Merge branch 'converter_export' into smart_staging

2011-07-18 04:12:28 -04:00
parent 082c3e699c a0297d1053
commit 6cefef5185
40 changed files with 1138 additions and 550 deletions
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Open source, Django based document manager with custom metadata indexing, file s

 [Website](http://bit.ly/mayan-edms)

-Requirements
+Basic requirements
 ---

 Python:
@@ -15,6 +15,21 @@ Python:
 * django-filetransfers - File upload/download abstraction
 * celery- asynchronous task queue/job queue based on distributed message passing
 * django-celery - celery Django integration
+* django-mptt - Utilities for implementing a modified pre-order traversal tree in django
+* python-magic - A python wrapper for libmagic
+* django-taggit - Simple tagging for django
+* slate - The simplest way to extract text from PDFs in Python
+
+
+Execute pip install -r requirements/production.txt to install the python/django dependencies automatically.
+
+Executables:
+
+* tesseract-ocr - An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google.
+* unpaper - post-processing scanned and photocopied book pages
+
+Optional requirements
+---

 For the GridFS storage backend:

@@ -22,13 +37,12 @@ For the GridFS storage backend:
 * GridFS - a storage specification for large objects in MongoDB
 * MongoDB - a scalable, open source, document-oriented database

-Or execute pip install -r requirements/production.txt to install the dependencies automatically.
+Libraries:

-Executables:
+* libmagic - MIME detection library, if not installed Mayan will fall back to using python's simpler mimetype built in library
+
+Mayan has the ability to switch between different image conversion backends, at the moment these two are supported:

-* libmagic - MIME detection library
-* tesseract-ocr - An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google.
-* unpaper - post-processing scanned and photocopied book pages
 * ImageMagick - Convert, Edit, Or Compose Bitmap Images
 * GraphicMagick - Robust collection of tools and libraries to read, write, and manipulate an image.

--- a/apps/converter/init.py
+++ b/apps/converter/init.py
@@ -1,11 +1,16 @@
 from django.utils.translation import ugettext_lazy as _
+from django.core.exceptions import ImproperlyConfigured

 from navigation.api import register_sidebar_template

-TRANFORMATION_CHOICES = {
-    u'rotate': u'-rotate %(degrees)d'
-}
+from converter.utils import load_backend
+from converter.conf.settings import GRAPHICS_BACKEND

 formats_list = {'text': _('file formats'), 'view': 'formats_list', 'famfam': 'pictures'}

 register_sidebar_template(['formats_list'], 'converter_file_formats_help.html')
+
+try:
+    backend = load_backend().ConverterClass()
+except ImproperlyConfigured:
+    raise ImproperlyConfigured(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND)
--- a/apps/converter/api.py
+++ b/apps/converter/api.py
@@ -1,66 +1,29 @@
 import os
 import subprocess
-
-from django.utils.importlib import import_module
-from django.template.defaultfilters import slugify
-
-from converter.conf.settings import UNPAPER_PATH
-from converter.conf.settings import OCR_OPTIONS
-from converter.conf.settings import DEFAULT_OPTIONS
-from converter.conf.settings import LOW_QUALITY_OPTIONS
-from converter.conf.settings import HIGH_QUALITY_OPTIONS
-from converter.conf.settings import PRINT_QUALITY_OPTIONS
-from converter.conf.settings import GRAPHICS_BACKEND
-from converter.conf.settings import UNOCONV_PATH
-
-from converter.exceptions import UnpaperError, OfficeConversionError
+import hashlib

 from common import TEMPORARY_DIRECTORY
 from documents.utils import document_save_to_temp_dir

-DEFAULT_ZOOM_LEVEL = 100
-DEFAULT_ROTATION = 0
-DEFAULT_PAGE_INDEX_NUMBER = 0
-DEFAULT_FILE_FORMAT = u'jpg'
-DEFAULT_OCR_FILE_FORMAT = u'tif'
+from converter.conf.settings import UNOCONV_PATH
+from converter.exceptions import UnpaperError, OfficeConversionError
+from converter.literals import DEFAULT_PAGE_NUMBER, \
+    DEFAULT_OCR_FILE_FORMAT, QUALITY_DEFAULT, DEFAULT_ZOOM_LEVEL, \
+    DEFAULT_ROTATION, DEFAULT_FILE_FORMAT, QUALITY_HIGH

-QUALITY_DEFAULT = u'quality_default'
-QUALITY_LOW = u'quality_low'
-QUALITY_HIGH = u'quality_high'
-QUALITY_PRINT = u'quality_print'
-
-QUALITY_SETTINGS = {
-    QUALITY_DEFAULT: DEFAULT_OPTIONS,
-    QUALITY_LOW: LOW_QUALITY_OPTIONS,
-    QUALITY_HIGH: HIGH_QUALITY_OPTIONS,
-    QUALITY_PRINT: PRINT_QUALITY_OPTIONS
-}
+from converter import backend
+from converter.literals import TRANSFORMATION_CHOICES
+from converter.literals import TRANSFORMATION_RESIZE, \
+    TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
+    TRANSFORMATION_ZOOM
+from converter.literals import DIMENSION_SEPARATOR    

+HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
+    
 CONVERTER_OFFICE_FILE_EXTENSIONS = [
    u'ods', u'docx', u'doc'
 ]

-
-def _lazy_load(fn):
-    _cached = []
-
-    def _decorated():
-        if not _cached:
-            _cached.append(fn())
-        return _cached[0]
-    return _decorated
-
-
-@_lazy_load
-def _get_backend():
-    return import_module(GRAPHICS_BACKEND)
-
-try:
-    backend = _get_backend()
-except ImportError:
-    raise ImportError(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND)
-
-
 def cleanup(filename):
    """
    Tries to remove the given filename. Ignores non-existent files
@@ -71,21 +34,6 @@ def cleanup(filename):
        pass


-def execute_unpaper(input_filepath, output_filepath):
-    """
-    Executes the program unpaper using subprocess's Popen
-    """
-    command = []
-    command.append(UNPAPER_PATH)
-    command.append(u'--overwrite')
-    command.append(input_filepath)
-    command.append(output_filepath)
-    proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE)
-    return_code = proc.wait()
-    if return_code != 0:
-        raise UnpaperError(proc.stderr.readline())
-
-
 def execute_unoconv(input_filepath, arguments=''):
    """
    Executes the program unoconv using subprocess's Popen
@@ -109,19 +57,11 @@ def cache_cleanup(input_filepath, *args, **kwargs):

 def create_image_cache_filename(input_filepath, *args, **kwargs):
    if input_filepath:
-        temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))
-        temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename)
-
-        final_filepath = []
-        [final_filepath.append(str(arg)) for arg in args]
-        final_filepath.extend([u'%s_%s' % (key, value) for key, value in kwargs.items()])
-
-        temp_path += slugify(u'_'.join(final_filepath))
-
-        return temp_path
+        hash_value = HASH_FUNCTION(u''.join([input_filepath, unicode(args), unicode(kwargs)]))
+        return os.path.join(TEMPORARY_DIRECTORY, hash_value)
    else:
        return None
-
+        

 def convert_office_document(input_filepath):
    if os.path.exists(UNOCONV_PATH):
@@ -138,15 +78,14 @@ def convert_document(document, *args, **kwargs):
    return convert(document_save_to_temp_dir(document, document.checksum), *args, **kwargs)


-def convert(input_filepath, *args, **kwargs):
+def convert(input_filepath, cleanup_files=True, *args, **kwargs):
    size = kwargs.get('size')
    file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT)
-    extra_options = kwargs.get('extra_options', u'')
    zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL)
    rotation = kwargs.get('rotation', DEFAULT_ROTATION)
-    page = kwargs.get('page', DEFAULT_PAGE_INDEX_NUMBER)
-    cleanup_files = kwargs.get('cleanup_files', True)
+    page = kwargs.get('page', DEFAULT_PAGE_NUMBER)
    quality = kwargs.get('quality', QUALITY_DEFAULT)
+    transformations = kwargs.get('transformations', [])

    unoconv_output = None

@@ -160,20 +99,32 @@ def convert(input_filepath, *args, **kwargs):
        if result:
            unoconv_output = result
            input_filepath = result
-            extra_options = u''

-    input_arg = u'%s[%s]' % (input_filepath, page)
-    extra_options += u' -resize %s' % size
+    transformations.append(
+        {
+            'transformation': TRANSFORMATION_RESIZE,
+            'arguments': dict(zip([u'width', u'height'], size.split(DIMENSION_SEPARATOR)))
+        }
+    )
+
    if zoom != 100:
-        extra_options += u' -resize %d%% ' % zoom
+        transformations.append(
+            {
+                'transformation': TRANSFORMATION_ZOOM,
+                'arguments': {'percent': zoom}
+            }
+        )        

    if rotation != 0 and rotation != 360:
-        extra_options += u' -rotate %d ' % rotation
+        transformations.append(
+            {
+                'transformation': TRANSFORMATION_ROTATE,
+                'arguments': {'degrees': rotation}
+            }
+        )           

-    if format == u'jpg':
-        extra_options += u' -quality 85'
    try:
-        backend.execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath=u'%s:%s' % (file_format, output_filepath), quality=quality)
+        backend.convert_file(input_filepath=input_filepath, output_filepath=output_filepath, quality=quality, transformations=transformations, page=page, file_format=file_format)
    finally:
        if cleanup_files:
            cleanup(input_filepath)
@@ -184,51 +135,22 @@ def convert(input_filepath, *args, **kwargs):


 def get_page_count(input_filepath):
-    try:
-        return len(backend.execute_identify(unicode(input_filepath)).splitlines())
-    except:
-        #TODO: send to other page number identifying program
-        return 1
+    return backend.get_page_count(input_filepath)


 def get_document_dimensions(document, *args, **kwargs):
    document_filepath = create_image_cache_filename(document.checksum, *args, **kwargs)
    if os.path.exists(document_filepath):
        options = [u'-format', u'%w %h']
-        return [int(dimension) for dimension in backend.execute_identify(unicode(document_filepath), options).split()]
+        return [int(dimension) for dimension in backend.identify_file(unicode(document_filepath), options).split()]
    else:
        return [0, 0]


-def convert_document_for_ocr(document, page=DEFAULT_PAGE_INDEX_NUMBER, file_format=DEFAULT_OCR_FILE_FORMAT):
-    #Extract document file
-    input_filepath = document_save_to_temp_dir(document, document.uuid)
-
-    #Convert for OCR
-    temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))
-    temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename)
-    transformation_output_file = u'%s_trans%s%s%s' % (temp_path, page, os.extsep, file_format)
-    unpaper_input_file = u'%s_unpaper_in%s%spnm' % (temp_path, page, os.extsep)
-    unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep)
-    convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format)
-
-    input_arg = u'%s[%s]' % (input_filepath, page)
-
-    try:
-        document_page = document.documentpage_set.get(page_number=page + 1)
-        transformation_string, warnings = document_page.get_transformation_string()
-
-        #Apply default transformations
-        backend.execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file)
-        #Do OCR operations
-        backend.execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
-        # Process by unpaper
-        execute_unpaper(input_filepath=unpaper_input_file, output_filepath=unpaper_output_file)
-        # Convert to tif
-        backend.execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file)
-    finally:
-        cleanup(transformation_output_file)
-        cleanup(unpaper_input_file)
-        cleanup(unpaper_output_file)
-
-    return convert_output_file
+def get_available_transformations_choices():
+    result = []
+    for transformation in backend.get_available_transformations():
+        transformation_template = u'%s %s' % (TRANSFORMATION_CHOICES[transformation]['label'], u','.join(['<%s>' % argument['name'] if argument['required'] else '[%s]' % argument['name'] for argument in TRANSFORMATION_CHOICES[transformation]['arguments']]))
+        result.append([transformation, transformation_template])
+        
+    return result
--- a/apps/converter/backends/init.py
+++ b/apps/converter/backends/init.py
@@ -0,0 +1,18 @@
+class ConverterBase(object):
+    """
+    Base class that all backend classes must inherit
+    """
+    def convert_file(self, input_filepath, *args, **kwargs):
+        raise NotImplementedError("Your %s class has not defined a convert_file() method, which is required." % self.__class__.__name__)
+
+    def convert_document(self, document, *args, **kwargs):
+        raise NotImplementedError("Your %s class has not defined a convert_document() method, which is required." % self.__class__.__name__)
+
+    def get_format_list(self):
+        raise NotImplementedError("Your %s class has not defined a get_format_list() method, which is required." % self.__class__.__name__)
+
+    def get_available_transformations(self):
+        raise NotImplementedError("Your %s class has not defined a get_available_transformations() method, which is required." % self.__class__.__name__)
+   
+    def get_page_count(self):
+        raise NotImplementedError("Your %s class has not defined a get_page_count() method, which is required." % self.__class__.__name__)
--- a/apps/converter/backends/graphicsmagick.py
+++ b/apps/converter/backends/graphicsmagick.py
@@ -1,71 +0,0 @@
-import subprocess
-import re
-
-from converter.conf.settings import GM_PATH
-from converter.conf.settings import GM_SETTINGS
-from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS
-from converter.exceptions import ConvertError, UnknownFormat, IdentifyError
-
-CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format'
-CONVERTER_ERROR_STARTS_WITH = u'starts with'
-
-
-def execute_identify(input_filepath, arguments=None):
-    command = []
-    command.append(unicode(GM_PATH))
-    command.append(u'identify')
-    if arguments:
-        command.extend(arguments)
-    command.append(unicode(input_filepath))
-    proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-    return_code = proc.wait()
-    if return_code != 0:
-        raise IdentifyError(proc.stderr.readline())
-    return proc.stdout.read()
-
-
-def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
-    command = []
-    command.append(unicode(GM_PATH))
-    command.append(u'convert')
-    command.extend(unicode(QUALITY_SETTINGS[quality]).split())
-    command.extend(unicode(GM_SETTINGS).split())
-    command.append(unicode(input_filepath))
-    if arguments:
-        command.extend(unicode(arguments).split())
-    command.append(unicode(output_filepath))
-    proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-    return_code = proc.wait()
-    if return_code != 0:
-        #Got an error from convert program
-        error_line = proc.stderr.readline()
-        if (CONVERTER_ERROR_STRING_NO_DECODER in error_line) or (CONVERTER_ERROR_STARTS_WITH in error_line):
-            #Try to determine from error message which class of error is it
-            raise UnknownFormat
-        else:
-            raise ConvertError(error_line)
-
-
-def get_format_list():
-    """
-    Call GraphicsMagick to parse all of it's supported file formats, and
-    return a list of the names and descriptions
-    """
-    format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')    
-    formats = []
-    command = []
-    command.append(unicode(GM_PATH))
-    command.append(u'convert')
-    command.append(u'-list')
-    command.append(u'formats')
-    proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-    return_code = proc.wait()
-    if return_code != 0:
-        raise ConvertError(proc.stderr.readline())
-    
-    for line in proc.stdout.readlines():
-        fields = format_regex.findall(line)
-        if fields:
-            formats.append((fields[0][0], fields[0][3]))
-    
-    return formats
--- a/apps/converter/backends/graphicsmagick/init.py
+++ b/apps/converter/backends/graphicsmagick/init.py
--- a/apps/converter/backends/graphicsmagick/base.py
+++ b/apps/converter/backends/graphicsmagick/base.py
@@ -0,0 +1,119 @@
+import subprocess
+import re
+
+from converter.conf.settings import GM_PATH
+from converter.conf.settings import GM_SETTINGS
+from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS
+from converter.exceptions import ConvertError, UnknownFormat, \
+    IdentifyError
+from converter.backends import ConverterBase
+from converter.literals import TRANSFORMATION_RESIZE, \
+    TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
+    TRANSFORMATION_ZOOM
+from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \
+    DEFAULT_FILE_FORMAT
+
+CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format'
+CONVERTER_ERROR_STARTS_WITH = u'starts with'
+
+
+class ConverterClass(ConverterBase):
+    def identify_file(self, input_filepath, arguments=None):
+        command = []
+        command.append(unicode(GM_PATH))
+        command.append(u'identify')
+        if arguments:
+            command.extend(arguments)
+        command.append(unicode(input_filepath))
+        proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+        return_code = proc.wait()
+        if return_code != 0:
+            raise IdentifyError(proc.stderr.readline())
+        return proc.stdout.read()
+
+    def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
+        arguments = []
+        if transformations:
+            for transformation in transformations:
+                if transformation['transformation'] == TRANSFORMATION_RESIZE:
+                    dimensions = []
+                    dimensions.append(unicode(transformation['arguments']['width']))
+                    if 'height' in transformation['arguments']:
+                        dimensions.append(unicode(transformation['arguments']['height']))
+                    arguments.append(u'-resize')
+                    arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions))
+
+                elif transformation['transformation'] == TRANSFORMATION_ZOOM:
+                    arguments.append(u'-resize')
+                    arguments.append(u'%d%%' % transformation['arguments']['percent'])
+
+                elif transformation['transformation'] == TRANSFORMATION_ROTATE:
+                    arguments.append(u'-rotate')
+                    arguments.append(u'%s' % transformation['arguments']['degrees'])
+
+        if format == u'jpeg':
+            arguments.append(u'-quality')
+            arguments.append(u'85')
+
+        # Graphicsmagick page number is 0 base
+        input_arg = u'%s[%d]' % (input_filepath, page - 1)
+
+        # Specify the file format next to the output filename
+        output_filepath = u'%s:%s' % (file_format, output_filepath)
+
+        command = []
+        command.append(unicode(GM_PATH))
+        command.append(u'convert')
+        command.extend(unicode(QUALITY_SETTINGS[quality]).split())
+        command.extend(unicode(GM_SETTINGS).split())
+        command.append(unicode(input_arg))
+        if arguments:
+            command.extend(arguments)
+        command.append(unicode(output_filepath))
+        proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+        return_code = proc.wait()
+        if return_code != 0:
+            #Got an error from convert program
+            error_line = proc.stderr.readline()
+            if (CONVERTER_ERROR_STRING_NO_DECODER in error_line) or (CONVERTER_ERROR_STARTS_WITH in error_line):
+                #Try to determine from error message which class of error is it
+                raise UnknownFormat
+            else:
+                raise ConvertError(error_line)
+
+    def get_format_list(self):
+        """
+        Call GraphicsMagick to parse all of it's supported file formats, and
+        return a list of the names and descriptions
+        """
+        format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')
+        formats = []
+        command = []
+        command.append(unicode(GM_PATH))
+        command.append(u'convert')
+        command.append(u'-list')
+        command.append(u'formats')
+        proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+        return_code = proc.wait()
+        if return_code != 0:
+            raise ConvertError(proc.stderr.readline())
+
+        for line in proc.stdout.readlines():
+            fields = format_regex.findall(line)
+            if fields:
+                formats.append((fields[0][0], fields[0][3]))
+
+        return formats
+
+    def get_available_transformations(self):
+        return [
+            TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \
+            TRANSFORMATION_ZOOM
+        ]
+
+    def get_page_count(self, input_filepath):
+        try:
+            return len(self.identify_file(unicode(input_filepath)).splitlines())
+        except:
+            #TODO: send to other page number identifying program
+            return 1
--- a/apps/converter/backends/imagemagick.py
+++ b/apps/converter/backends/imagemagick.py
@@ -1,68 +0,0 @@
-import subprocess
-import re
-
-from converter.conf.settings import IM_IDENTIFY_PATH
-from converter.conf.settings import IM_CONVERT_PATH
-from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS
-from converter.exceptions import ConvertError, UnknownFormat, \
-    IdentifyError
-
-CONVERTER_ERROR_STRING_NO_DECODER = u'no decode delegate for this image format'
-
-
-def execute_identify(input_filepath, arguments=None):
-    command = []
-    command.append(unicode(IM_IDENTIFY_PATH))
-    if arguments:
-        command.extend(arguments)
-    command.append(unicode(input_filepath))
-
-    proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-    return_code = proc.wait()
-    if return_code != 0:
-        raise IdentifyError(proc.stderr.readline())
-    return proc.stdout.read()
-
-
-def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
-    command = []
-    command.append(unicode(IM_CONVERT_PATH))
-    command.extend(unicode(QUALITY_SETTINGS[quality]).split())
-    command.append(unicode(input_filepath))
-    if arguments:
-        command.extend(unicode(arguments).split())
-    command.append(unicode(output_filepath))
-    proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-    return_code = proc.wait()
-    if return_code != 0:
-        #Got an error from convert program
-        error_line = proc.stderr.readline()
-        if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
-            #Try to determine from error message which class of error is it
-            raise UnknownFormat
-        else:
-            raise ConvertError(error_line)
-
-
-def get_format_list():
-    """
-    Call ImageMagick to parse all of it's supported file formats, and
-    return a list of the names and descriptions
-    """
-    format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')
-    formats = []
-    command = []
-    command.append(unicode(IM_CONVERT_PATH))
-    command.append(u'-list')
-    command.append(u'format')
-    proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-    return_code = proc.wait()
-    if return_code != 0:
-        raise ConvertError(proc.stderr.readline())
-    
-    for line in proc.stdout.readlines():
-        fields = format_regex.findall(line)
-        if fields:
-            formats.append((fields[0][0], fields[0][3]))
-    
-    return formats
--- a/apps/converter/backends/imagemagick/init.py
+++ b/apps/converter/backends/imagemagick/init.py
--- a/apps/converter/backends/imagemagick/base.py
+++ b/apps/converter/backends/imagemagick/base.py
@@ -0,0 +1,118 @@
+import subprocess
+import re
+
+from converter.conf.settings import IM_IDENTIFY_PATH
+from converter.conf.settings import IM_CONVERT_PATH
+from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS
+from converter.exceptions import ConvertError, UnknownFormat, \
+    IdentifyError
+from converter.backends import ConverterBase
+from converter.literals import TRANSFORMATION_RESIZE, \
+    TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
+    TRANSFORMATION_ZOOM
+from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \
+    DEFAULT_FILE_FORMAT
+    
+CONVERTER_ERROR_STRING_NO_DECODER = u'no decode delegate for this image format'
+
+
+class ConverterClass(ConverterBase):
+    def identify_file(self, input_filepath, arguments=None):
+        command = []
+        command.append(unicode(IM_IDENTIFY_PATH))
+        if arguments:
+            command.extend(arguments)
+        command.append(unicode(input_filepath))
+
+        proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+        return_code = proc.wait()
+        if return_code != 0:
+            raise IdentifyError(proc.stderr.readline())
+        return proc.stdout.read()
+
+    def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
+        arguments = []
+        if transformations:
+            for transformation in transformations:
+                if transformation['transformation'] == TRANSFORMATION_RESIZE:
+                    dimensions = []
+                    dimensions.append(unicode(transformation['arguments']['width']))
+                    if 'height' in transformation['arguments']:
+                        dimensions.append(unicode(transformation['arguments']['height']))                    
+                    arguments.append(u'-resize')
+                    arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions))
+
+                elif transformation['transformation'] == TRANSFORMATION_ZOOM:
+                    arguments.append(u'-resize')
+                    arguments.append(u'%d%%' % transformation['arguments']['percent'])
+                    
+                elif transformation['transformation'] == TRANSFORMATION_ROTATE:
+                    arguments.append(u'-rotate')
+                    arguments.append(u'%s' % transformation['arguments']['degrees'])
+                    
+        if format == u'jpeg':
+            arguments.append(u'-quality')
+            arguments.append(u'85')
+        
+        # Imagemagick page number is 0 base
+        input_arg = u'%s[%d]' % (input_filepath, page - 1)
+
+        # Specify the file format next to the output filename
+        output_filepath = u'%s:%s' % (file_format, output_filepath)
+                  
+        command = []
+        command.append(unicode(IM_CONVERT_PATH))
+        command.extend(unicode(QUALITY_SETTINGS[quality]).split())
+        command.append(unicode(input_arg))
+        if arguments:
+            command.extend(arguments)
+        command.append(unicode(output_filepath))
+        proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+        return_code = proc.wait()
+        if return_code != 0:
+            #Got an error from convert program
+            error_line = proc.stderr.readline()
+            if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
+                #Try to determine from error message which class of error is it
+                raise UnknownFormat
+            else:
+                raise ConvertError(error_line)
+
+
+    def get_format_list(self):
+        """
+        Call ImageMagick to parse all of it's supported file formats, and
+        return a list of the names and descriptions
+        """
+        format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')
+        formats = []
+        command = []
+        command.append(unicode(IM_CONVERT_PATH))
+        command.append(u'-list')
+        command.append(u'format')
+        proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+        return_code = proc.wait()
+        if return_code != 0:
+            raise ConvertError(proc.stderr.readline())
+        
+        for line in proc.stdout.readlines():
+            fields = format_regex.findall(line)
+            if fields:
+                formats.append((fields[0][0], fields[0][3]))
+        
+        return formats
+
+
+    def get_available_transformations(self):
+        return [
+            TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \
+            TRANSFORMATION_ZOOM
+        ]
+
+
+    def get_page_count(self, input_filepath):
+        try:
+            return len(self.identify_file(unicode(input_filepath)).splitlines())
+        except:
+            #TODO: send to other page number identifying program
+            return 1
--- a/apps/converter/backends/python/init.py
+++ b/apps/converter/backends/python/init.py
@@ -0,0 +1,3 @@
+from PIL import Image
+
+Image.init()
--- a/apps/converter/backends/python/base.py
+++ b/apps/converter/backends/python/base.py
@@ -0,0 +1,93 @@
+import slate
+from PIL import Image
+
+from django.utils.translation import ugettext_lazy as _
+
+from converter.literals import QUALITY_DEFAULT, QUALITY_SETTINGS
+from converter.exceptions import ConvertError, UnknownFormat, IdentifyError
+from converter.backends import ConverterBase
+from converter.literals import TRANSFORMATION_RESIZE, \
+    TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM
+from converter.literals import QUALITY_DEFAULT, DEFAULT_PAGE_NUMBER, \
+    DEFAULT_FILE_FORMAT
+from converter.utils import get_mimetype
+
+
+class ConverterClass(ConverterBase):
+    def get_page_count(self, input_filepath):
+        page_count = 1
+        
+        mimetype, encoding = get_mimetype(input_filepath)
+        if mimetype == 'application/pdf':
+            # If file is a PDF open it with slate to determine the page
+            # count
+            with open(input_filepath) as fd:
+                pages = slate.PDF(fd)
+            return len(pages)
+            
+        try:
+            im = Image.open(input_filepath)
+        except IOError:  #cannot identify image file
+            # Return a page count of 1, to atleast allow the document
+            # to be created
+            return 1
+            
+        try:
+            while 1:
+                im.seek(im.tell()+1)
+                page_count += 1
+                # do something to im
+        except EOFError:
+            pass # end of sequence
+            
+        return page_count
+    
+    def convert_file(self, input_filepath, output_filepath, transformations=None, quality=QUALITY_DEFAULT, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
+        try:
+            im = Image.open(input_filepath)
+        except Exception: # Python Imaging Library doesn't recognize it as an image
+            raise UnknownFormat
+        
+        current_page = 0
+        try:
+            while current_page == page - 1:
+                im.seek(im.tell() + 1)
+                current_page += 1
+                # do something to im
+        except EOFError:
+            pass # end of sequence        
+
+        if transformations:
+            for transformation in transformations:
+                aspect = 1.0 * im.size[1] / im.size[0]
+                if transformation['transformation'] == TRANSFORMATION_RESIZE:
+                    width = int(transformation['arguments']['width'])
+                    height = int(transformation['arguments'].get('height', 1.0 * width * aspect))
+                    im = im.resize((width, height), Image.ANTIALIAS)
+                elif transformation['transformation'] == TRANSFORMATION_ZOOM:
+                    decimal_value = float(transformation['arguments']['percent']) / 100
+                    im = im.transform((im.size[0] * decimal_value, im.size[1] * decimal_value), Image.EXTENT, (0, 0, im.size[0], im.size[1])) 
+                elif transformation['transformation'] == TRANSFORMATION_ROTATE:
+                    # PIL counter degress counter-clockwise, reverse them
+                    im = im.rotate(360 - transformation['arguments']['degrees'])
+
+        if im.mode not in ('L', 'RGB'):
+            im = im.convert('RGB')
+        im.save(output_filepath, format=file_format)
+
+    def get_format_list(self):
+        """
+        Introspect PIL's internal registry to obtain a list of the
+        supported file types
+        """
+        formats = []
+        for format_name in Image.ID:
+            formats.append((format_name, u''))
+        
+        return formats
+
+    def get_available_transformations(self):
+        return [
+            TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \
+            TRANSFORMATION_ZOOM
+        ]
--- a/apps/converter/conf/settings.py
+++ b/apps/converter/conf/settings.py
@@ -9,12 +9,11 @@ register_settings(
    settings=[
        {'name': u'IM_CONVERT_PATH', 'global_name': u'CONVERTER_IM_CONVERT_PATH', 'default': u'/usr/bin/convert', 'description': _(u'File path to imagemagick\'s convert program.'), 'exists': True},
        {'name': u'IM_IDENTIFY_PATH', 'global_name': u'CONVERTER_IM_IDENTIFY_PATH', 'default': u'/usr/bin/identify', 'description': _(u'File path to imagemagick\'s identify program.'), 'exists': True},
-        {'name': u'UNPAPER_PATH', 'global_name': u'CONVERTER_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True},
        {'name': u'GM_PATH', 'global_name': u'CONVERTER_GM_PATH', 'default': u'/usr/bin/gm', 'description': _(u'File path to graphicsmagick\'s program.'), 'exists': True},
        {'name': u'GM_SETTINGS', 'global_name': u'CONVERTER_GM_SETTINGS', 'default': u''},
-        {'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.imagemagick', 'description': _(u'Graphics conversion backend to use.  Options are: converter.backends.imagemagick and converter.backends.graphicsmagick.')},
+        {'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.python', 'description': _(u'Graphics conversion backend to use.  Options are: converter.backends.imagemagick, converter.backends.graphicsmagick and converter.backends.python.')},
        {'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True},
-        {'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'},
+        #{'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'},
        {'name': u'DEFAULT_OPTIONS', 'global_name': u'CONVERTER_DEFAULT_OPTIONS', 'default': u''},
        {'name': u'LOW_QUALITY_OPTIONS', 'global_name': u'CONVERTER_LOW_QUALITY_OPTIONS', 'default': u''},
        {'name': u'HIGH_QUALITY_OPTIONS', 'global_name': u'CONVERTER_HIGH_QUALITY_OPTIONS', 'default': u'-density 400'},
--- a/apps/converter/literals.py
+++ b/apps/converter/literals.py
@@ -0,0 +1,64 @@
+from django.utils.translation import ugettext_lazy as _
+
+from converter.conf.settings import DEFAULT_OPTIONS
+from converter.conf.settings import LOW_QUALITY_OPTIONS
+from converter.conf.settings import HIGH_QUALITY_OPTIONS
+from converter.conf.settings import PRINT_QUALITY_OPTIONS
+
+DEFAULT_ZOOM_LEVEL = 100
+DEFAULT_ROTATION = 0
+DEFAULT_PAGE_NUMBER = 1
+DEFAULT_FILE_FORMAT = u'jpeg'
+DEFAULT_OCR_FILE_FORMAT = u'tif'
+
+QUALITY_DEFAULT = u'quality_default'
+QUALITY_LOW = u'quality_low'
+QUALITY_HIGH = u'quality_high'
+QUALITY_PRINT = u'quality_print'
+
+QUALITY_SETTINGS = {
+    QUALITY_DEFAULT: DEFAULT_OPTIONS,
+    QUALITY_LOW: LOW_QUALITY_OPTIONS,
+    QUALITY_HIGH: HIGH_QUALITY_OPTIONS,
+    QUALITY_PRINT: PRINT_QUALITY_OPTIONS
+}
+
+DIMENSION_SEPARATOR = u'x'
+
+TRANSFORMATION_RESIZE = u'resize'
+TRANSFORMATION_ROTATE = u'rotate'
+TRANSFORMATION_DENSITY = u'density'
+TRANSFORMATION_ZOOM = u'zoom'
+
+TRANSFORMATION_CHOICES = {
+    TRANSFORMATION_RESIZE: {
+        'label': _(u'Resize'),
+        'description': _(u'Resize.'),
+        'arguments': [
+            {'name': 'width', 'label': _(u'width'), 'required': True},
+            {'name': 'height', 'label': _(u'height'), 'required': False},
+        ]
+    },
+    TRANSFORMATION_ROTATE: {
+        'label': _(u'Rotate'),
+        'description': _(u'Rotate by n degress.'),
+        'arguments': [
+            {'name': 'degrees', 'label': _(u'degrees'), 'required': True}
+        ]
+    },
+    TRANSFORMATION_DENSITY: {
+        'label': _(u'Density'),
+        'description': _(u'Change the resolution (ie: DPI) without resizing.'),
+        'arguments': [
+            {'name': 'width', 'label': _(u'width'), 'required': True},
+            {'name': 'height', 'label': _(u'height'), 'required': False},
+        ]
+    }, 
+    TRANSFORMATION_ZOOM: {
+        'label': _(u'Zoom'),
+        'description': _(u'Zoom by n percent.'),
+        'arguments': [
+            {'name': 'percent', 'label': _(u'percent'), 'required': True}
+        ]
+    },        
+}
--- a/apps/converter/utils.py
+++ b/apps/converter/utils.py
@@ -1,6 +1,18 @@
+import os
+
+from django.core.exceptions import ImproperlyConfigured
+from django.utils.importlib import import_module
+
+try:
+    from python_magic import magic
+    USE_PYTHON_MAGIC = True
+except:
+    import mimetypes
+    mimetypes.init()
+    USE_PYTHON_MAGIC = False
+    
+    
 #http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
-
-
 def copyfile(source, dest, buffer_size=1024 * 1024):
    """
    Copy a file from source to dest. source and dest
@@ -21,3 +33,79 @@ def copyfile(source, dest, buffer_size=1024 * 1024):

    source.close()
    dest.close()
+
+
+def _lazy_load(fn):
+    _cached = []
+
+    def _decorated():
+        if not _cached:
+            _cached.append(fn())
+        return _cached[0]
+    return _decorated
+
+    
+@_lazy_load
+def load_backend():
+    from converter.conf.settings import GRAPHICS_BACKEND as backend_name
+
+    try:
+        module = import_module('.base', 'converter.backends.%s' % backend_name)
+        import warnings
+        warnings.warn(
+            "Short names for CONVERTER_BACKEND are deprecated; prepend with 'converter.backends.'",
+            PendingDeprecationWarning
+        )
+        return module
+    except ImportError, e:
+        # Look for a fully qualified converter backend name
+        try:
+            return import_module('.base', backend_name)
+        except ImportError, e_user:
+            # The converter backend wasn't found. Display a helpful error message
+            # listing all possible (built-in) converter backends.
+            backend_dir = os.path.join(os.path.dirname(__file__), 'backends')
+            try:
+                available_backends = [f for f in os.listdir(backend_dir)
+                        if os.path.isdir(os.path.join(backend_dir, f))
+                        and not f.startswith('.')]
+            except EnvironmentError:
+                available_backends = []
+            available_backends.sort()
+            if backend_name not in available_backends:
+                error_msg = ("%r isn't an available converter backend. \n" +
+                    "Try using converter.backends.XXX, where XXX is one of:\n    %s\n" +
+                    "Error was: %s") % \
+                    (backend_name, ", ".join(map(repr, available_backends)), e_user)
+                raise ImproperlyConfigured(error_msg)
+            else:
+                raise # If there's some other error, this must be an error in Mayan itself.
+
+
+def get_mimetype(filepath):
+    """
+    Determine a file's mimetype by calling the system's libmagic
+    library via python-magic or fallback to use python's mimetypes
+    library
+    """
+    file_mimetype = u''
+    file_mime_encoding = u''
+    
+    if USE_PYTHON_MAGIC:
+        if os.path.exists(filepath):
+            try:
+                source = open(filepath, 'r')
+                mime = magic.Magic(mime=True)
+                file_mimetype = mime.from_buffer(source.read())
+                source.seek(0)
+                mime_encoding = magic.Magic(mime_encoding=True)
+                file_mime_encoding = mime_encoding.from_buffer(source.read())
+            finally:
+                if source:
+                    source.close()
+    else:
+        path, filename = os.path.split(filepath)
+        file_mimetype, file_mime_encoding = mimetypes.guess_type(filename)
+        
+    return file_mimetype, file_mime_encoding
+
--- a/apps/converter/views.py
+++ b/apps/converter/views.py
@@ -1,38 +1,18 @@
 from django.utils.translation import ugettext_lazy as _
 from django.shortcuts import render_to_response
 from django.template import RequestContext
-from django.utils.importlib import import_module
+
+from converter import backend

 from converter.conf.settings import GRAPHICS_BACKEND

-
-def _lazy_load(fn):
-    _cached = []
-
-    def _decorated():
-        if not _cached:
-            _cached.append(fn())
-        return _cached[0]
-    return _decorated
-
-
-@_lazy_load
-def _get_backend():
-    return import_module(GRAPHICS_BACKEND)
-
-try:
-    backend = _get_backend()
-except ImportError:
-    raise ImportError(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND)
-
-
 def formats_list(request):
    #check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])

    context = {
        'title': _(u'suported file formats'),
        'hide_object': True,
-        'object_list': backend.get_format_list(),
+        'object_list': sorted(backend.get_format_list()),
        'extra_columns': [
            {
                'name': _(u'name'),
--- a/apps/documents/conf/settings.py
+++ b/apps/documents/conf/settings.py
@@ -18,10 +18,6 @@ def default_uuid():
    """unicode(uuid.uuid4())"""
    return unicode(uuid.uuid4())

-available_transformations = {
-    'rotate': {'label': _(u'Rotate [degrees]'), 'arguments': [{'name': 'degrees'}]}
-}
-
 register_settings(
    namespace=u'documents',
    module=u'documents.conf.settings',
@@ -31,8 +27,6 @@ register_settings(
        {'name': u'UUID_FUNCTION', 'global_name': u'DOCUMENTS_UUID_FUNCTION', 'default': default_uuid},
        # Storage
        {'name': u'STORAGE_BACKEND', 'global_name': u'DOCUMENTS_STORAGE_BACKEND', 'default': FileBasedStorage},
-        # Transformations
-        {'name': u'AVAILABLE_TRANSFORMATIONS', 'global_name': u'DOCUMENTS_AVAILABLE_TRANSFORMATIONS', 'default': available_transformations},
        # Usage
        {'name': u'PREVIEW_SIZE', 'global_name': u'DOCUMENTS_PREVIEW_SIZE', 'default': u'640x480'},
        {'name': u'PRINT_SIZE', 'global_name': u'DOCUMENTS_PRINT_SIZE', 'default': u'1400'},
--- a/apps/documents/managers.py
+++ b/apps/documents/managers.py
@@ -13,3 +13,24 @@ class RecentDocumentManager(models.Manager):
        to_delete = self.model.objects.filter(user=user)[RECENT_COUNT:]
        for recent_to_delete in to_delete:
            recent_to_delete.delete()
+
+
+class DocumentPageTransformationManager(models.Manager):
+    def get_for_document_page(self, document_page):
+        return self.model.objects.filter(document_page=document_page)
+
+    def get_for_document_page_as_list(self, document_page):
+        warnings = []
+        transformations = []
+        for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments'):
+            try:
+                transformations.append(
+                    {
+                        'transformation': transformation['transformation'],
+                        'arguments': eval(transformation['arguments'], {})
+                    }
+                )
+            except Exception, e:
+                warnings.append(e)
+        
+        return transformations, warnings  
--- a/apps/documents/models.py
+++ b/apps/documents/models.py
@@ -12,15 +12,13 @@ from python_magic import magic
 from taggit.managers import TaggableManager
 from dynamic_search.api import register
 from converter.api import get_page_count
-from converter import TRANFORMATION_CHOICES
+from converter.api import get_available_transformations_choices

 from documents.conf.settings import CHECKSUM_FUNCTION
 from documents.conf.settings import UUID_FUNCTION
 from documents.conf.settings import STORAGE_BACKEND
-from documents.conf.settings import AVAILABLE_TRANSFORMATIONS
-from documents.managers import RecentDocumentManager
-
-available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()])
+from documents.managers import RecentDocumentManager, \
+    DocumentPageTransformationManager


 def get_filename_from_uuid(instance, filename):
@@ -92,7 +90,7 @@ class Document(models.Model):
        mimetype, page count and transformation when originally created
        """
        new_document = not self.pk
-
+        transformations = kwargs.pop('transformations', None)
        super(Document, self).save(*args, **kwargs)

        if new_document:
@@ -101,7 +99,8 @@ class Document(models.Model):
            self.update_mimetype(save=False)
            self.save()
            self.update_page_count(save=False)
-            self.apply_default_transformations()
+            if transformations:
+                self.apply_default_transformations(transformations)

    @models.permalink
    def get_absolute_url(self):
@@ -202,21 +201,21 @@ class Document(models.Model):
        exists in storage
        """
        return self.file.storage.exists(self.file.path)
+    

-    def apply_default_transformations(self):
+    def apply_default_transformations(self, transformations):
        #Only apply default transformations on new documents
-        if DEFAULT_TRANSFORMATIONS and reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0:
-            for transformation in DEFAULT_TRANSFORMATIONS:
-                if 'name' in transformation:
-                    for document_page in self.documentpage_set.all():
-                        page_transformation = DocumentPageTransformation(
-                            document_page=document_page,
-                            order=0,
-                            transformation=transformation['name'])
-                        if 'arguments' in transformation:
-                            page_transformation.arguments = transformation['arguments']
+        if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0:
+            for transformation in transformations:
+                for document_page in self.documentpage_set.all():
+                    page_transformation = DocumentPageTransformation(
+                        document_page=document_page,
+                        order=0,
+                        transformation=transformation.get('transformation'),
+                        arguments=transformation.get('arguments')
+                    )

-                        page_transformation.save()
+                    page_transformation.save()


 class DocumentTypeFilename(models.Model):
@@ -258,26 +257,13 @@ class DocumentPage(models.Model):
        verbose_name = _(u'document page')
        verbose_name_plural = _(u'document pages')

+    def get_transformation_list(self):
+        return DocumentPageTransformation.objects.get_for_document_page_as_list(self)
+
    @models.permalink
    def get_absolute_url(self):
        return ('document_page_view', [self.pk])

-    def get_transformation_string(self):
-        transformation_list = []
-        warnings = []
-        for page_transformation in self.documentpagetransformation_set.all():
-            try:
-                if page_transformation.transformation in TRANFORMATION_CHOICES:
-                    transformation_list.append(
-                        TRANFORMATION_CHOICES[page_transformation.transformation] % eval(
-                            page_transformation.arguments
-                        )
-                    )
-            except Exception, e:
-                warnings.append(e)
-
-        return u' '.join(transformation_list), warnings
-

 class DocumentPageTransformation(models.Model):
    """
@@ -286,9 +272,11 @@ class DocumentPageTransformation(models.Model):
    """
    document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
    order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True)
-    transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation'))
+    transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation'))
    arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}'))

+    objects = DocumentPageTransformationManager()
+
    def __unicode__(self):
        return u'"%s" for %s' % (self.get_transformation_display(), unicode(self.document_page))

--- a/apps/documents/urls.py
+++ b/apps/documents/urls.py
@@ -1,14 +1,12 @@
 from django.conf.urls.defaults import patterns, url

-from converter.api import QUALITY_HIGH, QUALITY_PRINT
+from converter.literals import QUALITY_HIGH, QUALITY_PRINT

 from documents.conf.settings import PREVIEW_SIZE
 from documents.conf.settings import PRINT_SIZE
 from documents.conf.settings import THUMBNAIL_SIZE
 from documents.conf.settings import DISPLAY_SIZE
 from documents.conf.settings import MULTIPAGE_PREVIEW_SIZE
-#from documents.literals import UPLOAD_SOURCE_LOCAL, \
-#    UPLOAD_SOURCE_STAGING, UPLOAD_SOURCE_USER_STAGING

 urlpatterns = patterns('documents.views',
    url(r'^list/$', 'document_list', (), 'document_list'),
--- a/apps/documents/views.py
+++ b/apps/documents/views.py
@@ -20,10 +20,11 @@ from common.widgets import two_state_template
 from common.literals import PAGE_SIZE_DIMENSIONS, \
    PAGE_ORIENTATION_PORTRAIT, PAGE_ORIENTATION_LANDSCAPE
 from common.conf.settings import DEFAULT_PAPER_SIZE
-from converter.api import convert_document, QUALITY_DEFAULT
+from converter.api import convert_document
 from converter.exceptions import UnkownConvertError, UnknownFormat
-from converter.api import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
-    DEFAULT_FILE_FORMAT, QUALITY_PRINT
+from converter.literals import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
+    DEFAULT_FILE_FORMAT, QUALITY_PRINT, QUALITY_DEFAULT, \
+    DEFAULT_PAGE_NUMBER
 from filetransfers.api import serve_file
 from grouping.utils import get_document_group_subtemplate
 from metadata.api import save_metadata_list, \
@@ -286,38 +287,14 @@ def document_edit(request, document_id):
    }, context_instance=RequestContext(request))


-def calculate_converter_arguments(document, *args, **kwargs):
-    size = kwargs.pop('size', PREVIEW_SIZE)
-    quality = kwargs.pop('quality', QUALITY_DEFAULT)
-    page = kwargs.pop('page', 1)
-    file_format = kwargs.pop('file_format', DEFAULT_FILE_FORMAT)
-    zoom = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL)
-    rotation = kwargs.pop('rotation', DEFAULT_ROTATION)
-
-    document_page = DocumentPage.objects.get(document=document, page_number=page)
-    transformation_string, warnings = document_page.get_transformation_string()
-
-    arguments = {
-        'size': size,
-        'file_format': file_format,
-        'quality': quality,
-        'extra_options': transformation_string,
-        'page': page - 1,
-        'zoom': zoom,
-        'rotation': rotation
-    }
-
-    return arguments, warnings
-
-
 def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_DEFAULT):
    check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])

    document = get_object_or_404(Document, pk=document_id)

-    page = int(request.GET.get('page', 1))
+    page = int(request.GET.get('page', DEFAULT_PAGE_NUMBER))

-    zoom = int(request.GET.get('zoom', 100))
+    zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))

    if zoom < ZOOM_MIN_LEVEL:
        zoom = ZOOM_MIN_LEVEL
@@ -325,16 +302,17 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
    if zoom > ZOOM_MAX_LEVEL:
        zoom = ZOOM_MAX_LEVEL

-    rotation = int(request.GET.get('rotation', 0)) % 360
+    rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360

-    arguments, warnings = calculate_converter_arguments(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation)
+    document_page = get_object_or_404(document.documentpage_set, page_number=page)
+    transformations, warnings = document_page.get_transformation_list()

    if warnings and (request.user.is_staff or request.user.is_superuser):
        for warning in warnings:
            messages.warning(request, _(u'Page transformation error: %s') % warning)
-
+            
    try:
-        output_file = convert_document(document, **arguments)
+        output_file = convert_document(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation, transformations=transformations)
    except UnkownConvertError, e:
        if request.user.is_staff or request.user.is_superuser:
            messages.error(request, e)
@@ -592,13 +570,13 @@ def document_page_view(request, document_page_id):

    document_page = get_object_or_404(DocumentPage, pk=document_page_id)

-    zoom = int(request.GET.get('zoom', 100))
-    rotation = int(request.GET.get('rotation', 0))
+    zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))
+    rotation = int(request.GET.get('rotation', DEFAULT_ROTATION))
    document_page_form = DocumentPageForm(instance=document_page, zoom=zoom, rotation=rotation)

    base_title = _(u'details for: %s') % document_page

-    if zoom != 100:
+    if zoom != DEFAULT_ZOOM_LEVEL:
        zoom_text = u'(%d%%)' % zoom
    else:
        zoom_text = u''
--- a/apps/ocr/init.py
+++ b/apps/ocr/init.py
@@ -9,7 +9,7 @@ from documents.models import Document
 from main.api import register_tool

 from ocr.conf.settings import AUTOMATIC_OCR
-from ocr.models import DocumentQueue
+from ocr.models import DocumentQueue, QueueTransformation

 #Permissions
 PERMISSION_OCR_DOCUMENT = {'namespace': 'ocr', 'name': 'ocr_document', 'label': _(u'Submit document for OCR')}
@@ -30,20 +30,27 @@ re_queue_multiple_document = {'text': _('re-queue'), 'view': 're_queue_multiple_
 queue_document_delete = {'text': _(u'delete'), 'view': 'queue_document_delete', 'args': 'object.id', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]}
 queue_document_multiple_delete = {'text': _(u'delete'), 'view': 'queue_document_multiple_delete', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]}

-document_queue_disable = {'text': _(u'stop queue'), 'view': 'document_queue_disable', 'args': 'object.id', 'famfam': 'control_stop_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]}
-document_queue_enable = {'text': _(u'activate queue'), 'view': 'document_queue_enable', 'args': 'object.id', 'famfam': 'control_play_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]}
+document_queue_disable = {'text': _(u'stop queue'), 'view': 'document_queue_disable', 'args': 'queue.id', 'famfam': 'control_stop_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]}
+document_queue_enable = {'text': _(u'activate queue'), 'view': 'document_queue_enable', 'args': 'queue.id', 'famfam': 'control_play_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]}

 all_document_ocr_cleanup = {'text': _(u'clean up pages content'), 'view': 'all_document_ocr_cleanup', 'famfam': 'text_strikethrough', 'permissions': [PERMISSION_OCR_CLEAN_ALL_PAGES], 'description': _(u'Runs a language filter to remove common OCR mistakes from document pages content.')}

 queue_document_list = {'text': _(u'queue document list'), 'view': 'queue_document_list', 'famfam': 'hourglass', 'permissions': [PERMISSION_OCR_DOCUMENT]}
 node_active_list = {'text': _(u'active tasks'), 'view': 'node_active_list', 'famfam': 'server_chart', 'permissions': [PERMISSION_OCR_DOCUMENT]}

+setup_queue_transformation_list = {'text': _(u'transformations'), 'view': 'setup_queue_transformation_list', 'args': 'queue.pk', 'famfam': 'shape_move_front'}
+setup_queue_transformation_create = {'text': _(u'add transformation'), 'view': 'setup_queue_transformation_create', 'args': 'queue.pk', 'famfam': 'shape_square_add'}
+setup_queue_transformation_edit = {'text': _(u'edit'), 'view': 'setup_queue_transformation_edit', 'args': 'transformation.pk', 'famfam': 'shape_square_edit'}
+setup_queue_transformation_delete = {'text': _(u'delete'), 'view': 'setup_queue_transformation_delete', 'args': 'transformation.pk', 'famfam': 'shape_square_delete'}
+
 register_links(Document, [submit_document])
-register_links(DocumentQueue, [document_queue_disable, document_queue_enable])
+register_links(DocumentQueue, [document_queue_disable, document_queue_enable, setup_queue_transformation_list])
+register_links(QueueTransformation, [setup_queue_transformation_edit, setup_queue_transformation_delete])

 register_multi_item_links(['queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete])

-register_links(['queue_document_list', 'node_active_list'], [queue_document_list, node_active_list], menu_name='secondary_menu')
+register_links(['setup_queue_transformation_create', 'setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'document_queue_disable', 'document_queue_enable', 'queue_document_list', 'node_active_list', 'setup_queue_transformation_list'], [queue_document_list, node_active_list], menu_name='secondary_menu')
+register_links(['setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'setup_queue_transformation_list', 'setup_queue_transformation_create'], [setup_queue_transformation_create], menu_name='sidebar')


 register_tool(all_document_ocr_cleanup, namespace='ocr', title=_(u'OCR'))
--- a/apps/ocr/api.py
+++ b/apps/ocr/api.py
@@ -9,13 +9,15 @@ import sys
 from django.utils.translation import ugettext as _
 from django.utils.importlib import import_module

-from converter.api import convert_document_for_ocr
+from converter.api import convert
 from documents.models import DocumentPage

 from ocr.conf.settings import TESSERACT_PATH
 from ocr.conf.settings import TESSERACT_LANGUAGE
-from ocr.conf.settings import PDFTOTEXT_PATH
-from ocr.exceptions import TesseractError, PdftotextError
+from ocr.exceptions import TesseractError
+from ocr.conf.settings import UNPAPER_PATH
+from ocr.parsers import parse_document_page
+from ocr.parsers.exceptions import ParserError, ParserUnknownFile


 def get_language_backend():
@@ -30,7 +32,7 @@ def get_language_backend():
        return None
    return module

-backend = get_language_backend()
+language_backend = get_language_backend()


 def cleanup(filename):
@@ -58,63 +60,38 @@ def run_tesseract(input_filename, output_filename_base, lang=None):
        raise TesseractError(error_text)


-def run_pdftotext(input_filename, output_filename, page_number=None):
-    """
-        Execute the command line binary of pdftotext
-    """
-    command = [unicode(PDFTOTEXT_PATH)]
-    if page_number:
-        command.extend([u'-nopgbrk', u'-f', unicode(page_number), u'-l', unicode(page_number)])
-    command.extend([unicode(input_filename), unicode(output_filename)])
-    proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-    return_code = proc.wait()
-    if return_code != 0:
-        error_text = proc.stderr.read()
-        raise PdftotextError(error_text)
-
-
 def do_document_ocr(document):
    """
-    Do OCR on all the pages of the given document object, first
-    trying to extract text from PDF using pdftotext then by calling
-    tesseract
+    first try to extract text from document pages using the registered
+    parser if the parser fails or if there is no parser registered for
+    the document mimetype do a visual OCR by calling tesseract
    """
-    for page_index, document_page in enumerate(document.documentpage_set.all()):
-        desc, filepath = tempfile.mkstemp()
-        imagefile = None
-        source = u''
+    for document_page in document.documentpage_set.all():
        try:
-            if document.file_mimetype == u'application/pdf':
-                pdf_filename = os.extsep.join([filepath, u'pdf'])
-                document.save_to_file(pdf_filename)
-                run_pdftotext(pdf_filename, filepath, document_page.page_number)
-                cleanup(pdf_filename)
-                if os.stat(filepath).st_size == 0:
-                    #PDF page had no text, run tesseract on the page
-                    imagefile = convert_document_for_ocr(document, page=page_index)
-                    run_tesseract(imagefile, filepath, TESSERACT_LANGUAGE)
-                    ocr_output = os.extsep.join([filepath, u'txt'])
-                    source = _(u'Text from OCR')
-                else:
-                    ocr_output = filepath
-                    source = _(u'Text extracted from PDF')
-            else:
-                imagefile = convert_document_for_ocr(document, page=page_index)
-                run_tesseract(imagefile, filepath, TESSERACT_LANGUAGE)
-                ocr_output = os.extsep.join([filepath, u'txt'])
-                source = _(u'Text from OCR')
-            f = codecs.open(ocr_output, 'r', 'utf-8')
-            document_page = document.documentpage_set.get(page_number=page_index + 1)
-            document_page.content = ocr_cleanup(f.read().strip())
-            document_page.page_label = source
-            document_page.save()
-            f.close()
-            cleanup(ocr_output)
-        finally:
-            os.close(desc)
-            cleanup(filepath)
-            if imagefile:
-                cleanup(imagefile)
+            # Try to extract text by means of a parser
+            parse_document_page(document_page)
+        except (ParserError, ParserUnknownFile):
+            # Fall back to doing visual OCR
+            pass
+            #desc, filepath = tempfile.mkstemp()
+            #imagefile = None
+            #source = u''
+            #imagefile = convert_document_for_ocr(document, page=document_page.page_number)
+            #run_tesseract(imagefile, filepath, TESSERACT_LANGUAGE)
+            #ocr_output = os.extsep.join([filepath, u'txt'])
+            #source = _(u'Text from OCR')
+            #f = codecs.open(ocr_output, 'r', 'utf-8')
+            #document_page.content = ocr_cleanup(f.read().strip())
+            #document_page.page_label = source
+            #document_page.save()
+            #f.close()
+            #cleanup(ocr_output)
+        #finally:
+        #    pass
+            #os.close(desc)
+            #cleanup(filepath)
+            #if imagefile:
+            #    cleanup(imagefile)


 def ocr_cleanup(text):
@@ -127,8 +104,8 @@ def ocr_cleanup(text):
    for line in text.splitlines():
        line = line.strip()
        for word in line.split():
-            if backend:
-                result = backend.check_word(word)
+            if language_backend:
+                result = language_backend.check_word(word)
            else:
                result = word
            if result:
@@ -147,3 +124,53 @@ def clean_pages():
        if page.content:
            page.content = ocr_cleanup(page.content)
            page.save()
+
+
+def execute_unpaper(input_filepath, output_filepath):
+    """
+    Executes the program unpaper using subprocess's Popen
+    """
+    command = []
+    command.append(UNPAPER_PATH)
+    command.append(u'--overwrite')
+    command.append(input_filepath)
+    command.append(output_filepath)
+    proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE)
+    return_code = proc.wait()
+    if return_code != 0:
+        raise UnpaperError(proc.stderr.readline())
+
+'''
+def convert_document_for_ocr(document, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_OCR_FILE_FORMAT):
+    #Extract document file
+    input_filepath = document_save_to_temp_dir(document, document.uuid)
+
+    #Convert for OCR
+    temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))
+    temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename)
+    transformation_output_file = u'%s_trans%s%s%s' % (temp_path, page, os.extsep, file_format)
+    unpaper_input_file = u'%s_unpaper_in%s%spnm' % (temp_path, page, os.extsep)
+    unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep)
+    convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format)
+
+    try:
+        document_page = document.documentpage_set.get(page_number=page)
+        transformations, warnings = document_page.get_transformation_list()
+
+        #Apply default transformations
+        backend.convert_file(input_filepath=input_filepath, page=page, quality=QUALITY_HIGH, transformations=transformations, output_filepath=transformation_output_file)
+        #Do OCR operations
+        backend.convert_file(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
+        # Process by unpaper
+        execute_unpaper(input_filepath=unpaper_input_file, output_filepath=unpaper_output_file)
+        # Convert to tif
+        backend.convert_file(input_filepath=unpaper_output_file, output_filepath=convert_output_file)
+    finally:
+        cleanup(transformation_output_file)
+        cleanup(unpaper_input_file)
+        cleanup(unpaper_output_file)
+
+    return convert_output_file
+'''
+
+
--- a/apps/ocr/conf/settings.py
+++ b/apps/ocr/conf/settings.py
@@ -13,8 +13,9 @@ register_settings(
        {'name': u'REPLICATION_DELAY', 'global_name': u'OCR_REPLICATION_DELAY', 'default': 10, 'description': _(u'Amount of seconds to delay OCR of documents to allow for the node\'s storage replication overhead.')},
        {'name': u'NODE_CONCURRENT_EXECUTION', 'global_name': u'OCR_NODE_CONCURRENT_EXECUTION', 'default': 1, 'description': _(u'Maximum amount of concurrent document OCRs a node can perform.')},
        {'name': u'AUTOMATIC_OCR', 'global_name': u'OCR_AUTOMATIC_OCR', 'default': False, 'description': _(u'Automatically queue newly created documents for OCR.')},
-        {'name': u'PDFTOTEXT_PATH', 'global_name': u'OCR_PDFTOTEXT_PATH', 'default': u'/usr/bin/pdftotext', 'exists': True},
        {'name': u'QUEUE_PROCESSING_INTERVAL', 'global_name': u'OCR_QUEUE_PROCESSING_INTERVAL', 'default': 10},
-        {'name': u'CACHE_URI', 'global_name': u'OCR_CACHE_URI', 'default': None, 'description': _(u'URI in the form: "memcached://127.0.0.1:11211/" to specify a cache backend to use for locking. Multiple hosts can be specified separated by a semicolon.')}
+        {'name': u'CACHE_URI', 'global_name': u'OCR_CACHE_URI', 'default': None, 'description': _(u'URI in the form: "memcached://127.0.0.1:11211/" to specify a cache backend to use for locking. Multiple hosts can be specified separated by a semicolon.')},
+        {'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True},
+        {'name': u'PARSERS_PDFTOTEXT_PATH', 'global_name': u'OCR_PARSERS_PDFTOTEXT_PATH', 'default': u'/usr/bin/pdftotext', 'exists': True},
    ]
 )
--- a/apps/ocr/exceptions.py
+++ b/apps/ocr/exceptions.py
@@ -4,7 +4,3 @@ class AlreadyQueued(Exception):

 class TesseractError(Exception):
    pass
-
-
-class PdftotextError(Exception):
-    pass
--- a/apps/ocr/forms.py
+++ b/apps/ocr/forms.py
@@ -0,0 +1,21 @@
+from django import forms
+from django.utils.translation import ugettext_lazy as _
+from django.utils.translation import ugettext
+
+from ocr.models import QueueTransformation
+
+
+class QueueTransformationForm(forms.ModelForm):
+    class Meta:
+        model = QueueTransformation
+
+    def __init__(self, *args, **kwargs):
+        super(QueueTransformationForm, self).__init__(*args, **kwargs)
+        self.fields['content_type'].widget = forms.HiddenInput()
+        self.fields['object_id'].widget = forms.HiddenInput()
+
+
+class QueueTransformationForm_create(forms.ModelForm):
+    class Meta:
+        model = QueueTransformation
+        exclude = ('content_type', 'object_id')
--- a/apps/ocr/manager.py
+++ b/apps/ocr/manager.py
@@ -1,18 +0,0 @@
-from django.db import models
-
-from ocr.exceptions import AlreadyQueued
-
-
-class DocumentQueueManager(models.Manager):
-    """
-    Module manager class to handle adding documents to an OCR document
-    queue
-    """
-    def queue_document(self, document, queue_name='default'):
-        document_queue = self.model.objects.get(name=queue_name)
-        if document_queue.queuedocument_set.filter(document=document):
-            raise AlreadyQueued
-
-        document_queue.queuedocument_set.create(document=document, delay=True)
-
-        return document_queue
--- a/apps/ocr/managers.py
+++ b/apps/ocr/managers.py
@@ -0,0 +1,41 @@
+from django.db import models
+from django.contrib.contenttypes.models import ContentType
+
+from ocr.exceptions import AlreadyQueued
+
+
+class DocumentQueueManager(models.Manager):
+    """
+    Module manager class to handle adding documents to an OCR document
+    queue
+    """
+    def queue_document(self, document, queue_name='default'):
+        document_queue = self.model.objects.get(name=queue_name)
+        if document_queue.queuedocument_set.filter(document=document):
+            raise AlreadyQueued
+
+        document_queue.queuedocument_set.create(document=document, delay=True)
+
+        return document_queue
+
+
+class QueueTransformationManager(models.Manager):
+    def get_for_object(self, obj):
+        ct = ContentType.objects.get_for_model(obj)
+        return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk)
+
+    def get_for_object_as_list(self, obj):
+        warnings = []
+        transformations = []
+        for transformation in self.get_for_object(obj).values('transformation', 'arguments'):
+            try:
+                transformations.append(
+                    {
+                        'transformation': transformation['transformation'],
+                        'arguments': eval(transformation['arguments'], {})
+                    }
+                )
+            except Exception, e:
+                warnings.append(e)
+        
+        return transformations, warnings
--- a/apps/ocr/models.py
+++ b/apps/ocr/models.py
@@ -2,13 +2,16 @@ from django.db import models
 from django.utils.translation import ugettext_lazy as _
 from django.utils.translation import ugettext
 from django.core.exceptions import ObjectDoesNotExist
+from django.contrib.contenttypes.models import ContentType
+from django.contrib.contenttypes import generic

 from documents.models import Document
+from converter.api import get_available_transformations_choices

 from ocr.literals import DOCUMENTQUEUE_STATE_STOPPED, \
    DOCUMENTQUEUE_STATE_CHOICES, QUEUEDOCUMENT_STATE_PENDING, \
    QUEUEDOCUMENT_STATE_CHOICES
-from ocr.manager import DocumentQueueManager
+from ocr.managers import DocumentQueueManager, QueueTransformationManager


 class DocumentQueue(models.Model):
@@ -51,3 +54,26 @@ class QueueDocument(models.Model):
            return unicode(self.document)
        except ObjectDoesNotExist:
            return ugettext(u'Missing document.')
+
+
+class QueueTransformation(models.Model):
+    """
+    Model that stores the transformation and transformation arguments
+    for a given document queue
+    """
+    content_type = models.ForeignKey(ContentType)
+    object_id = models.PositiveIntegerField()
+    content_object = generic.GenericForeignKey('content_type', 'object_id')
+    order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True)
+    transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation'))
+    arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}')
+
+    objects = QueueTransformationManager()
+
+    def __unicode__(self):
+        return self.get_transformation_display()
+
+    class Meta:
+        ordering = ('order',)
+        verbose_name = _(u'document queue transformation')
+        verbose_name_plural = _(u'document queue transformations')
--- a/apps/ocr/parsers/init.py
+++ b/apps/ocr/parsers/init.py
@@ -0,0 +1,40 @@
+import codecs
+import os
+import subprocess
+import tempfile
+import sys
+
+import slate
+
+from django.utils.translation import ugettext as _
+
+from ocr.parsers.exceptions import ParserError, ParserUnknownFile
+
+mimetype_registry = {}
+
+
+def register_parser(mimetype, function):
+    mimetype_registry[mimetype] = {'function': function}
+
+
+def pdf_parser(document_page):
+    fd = document_page.document.open()
+    pdf_pages = slate.PDF(fd)
+    fd.close()
+    
+    if pdf_pages[document_page.page_number - 1] == '\x0c':
+        raise ParserError
+    
+    document_page.content = pdf_pages[document_page.page_number - 1]
+    document_page.page_label = _(u'Text extracted from PDF')
+    document_page.save()
+       
+
+def parse_document_page(document_page):
+    try:
+        mimetype_registry[document_page.document.file_mimetype]['function'](document_page)
+    except KeyError:
+        raise ParserUnknownFile
+
+        
+register_parser('application/pdf', pdf_parser)
--- a/apps/ocr/parsers/exceptions.py
+++ b/apps/ocr/parsers/exceptions.py
@@ -0,0 +1,10 @@
+class ParserError(Exception):
+    """
+    Raised when a text parser fails to understand a file it been passed
+    or the resulting parsed text is invalid
+    """
+    pass
+
+
+class ParserUnknownFile(Exception):
+    pass
--- a/apps/ocr/urls.py
+++ b/apps/ocr/urls.py
@@ -1,16 +1,22 @@
 from django.conf.urls.defaults import patterns, url

 urlpatterns = patterns('ocr.views',
-    url(r'^(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'),
-    url(r'^ocr/queue/document/list/$', 'queue_document_list', (), 'queue_document_list'),
-    url(r'^ocr/queue/document/(?P<queue_document_id>\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'),
-    url(r'^ocr/queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'),
-    url(r'^ocr/queue/document/(?P<queue_document_id>\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'),
-    url(r'^ocr/queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'),
+    url(r'^document/(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'),
+    url(r'^queue/document/list/$', 'queue_document_list', (), 'queue_document_list'),
+    url(r'^queue/document/(?P<queue_document_id>\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'),
+    url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'),
+    url(r'^queue/document/(?P<queue_document_id>\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'),
+    url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'),

-    url(r'^ocr/queue/(?P<document_queue_id>\d+)/enable/$', 'document_queue_enable', (), 'document_queue_enable'),
-    url(r'^ocr/queue/(?P<document_queue_id>\d+)/disable/$', 'document_queue_disable', (), 'document_queue_disable'),
+    url(r'^queue/(?P<document_queue_id>\d+)/enable/$', 'document_queue_enable', (), 'document_queue_enable'),
+    url(r'^queue/(?P<document_queue_id>\d+)/disable/$', 'document_queue_disable', (), 'document_queue_disable'),

-    url(r'^ocr/document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'),
-    url(r'^ocr/node/active/list/$', 'node_active_list', (), 'node_active_list'),
+    url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'),
+    url(r'^node/active/list/$', 'node_active_list', (), 'node_active_list'),
+    
+    url(r'^queue/(?P<document_queue_id>\d+)/transformation/list/$', 'setup_queue_transformation_list', (), 'setup_queue_transformation_list'),
+    url(r'^queue/(?P<document_queue_id>\w+)/transformation/create/$', 'setup_queue_transformation_create', (), 'setup_queue_transformation_create'),
+    url(r'^queue/transformation/(?P<transformation_id>\w+)/edit/$', 'setup_queue_transformation_edit', (), 'setup_queue_transformation_edit'),
+    url(r'^queue/transformation/(?P<transformation_id>\w+)/delete/$', 'setup_queue_transformation_delete', (), 'setup_queue_transformation_delete'),
+    
 )
--- a/apps/ocr/views.py
+++ b/apps/ocr/views.py
@@ -6,9 +6,8 @@ from django.shortcuts import render_to_response, get_object_or_404
 from django.template import RequestContext
 from django.contrib import messages
 from django.views.generic.list_detail import object_list
-from django.core.urlresolvers import reverse
 from django.utils.translation import ugettext_lazy as _
-from django.conf import settings
+from django.core.urlresolvers import reverse

 from celery.task.control import inspect
 from permissions.api import check_permissions
@@ -18,12 +17,13 @@ from documents.widgets import document_link, document_thumbnail
 from ocr import PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE, \
    PERMISSION_OCR_QUEUE_ENABLE_DISABLE, PERMISSION_OCR_CLEAN_ALL_PAGES

-from ocr.models import DocumentQueue, QueueDocument
+from ocr.models import DocumentQueue, QueueDocument, QueueTransformation
 from ocr.literals import QUEUEDOCUMENT_STATE_PENDING, \
    QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_STOPPED, \
    DOCUMENTQUEUE_STATE_ACTIVE
 from ocr.exceptions import AlreadyQueued
 from ocr.api import clean_pages
+from ocr.forms import QueueTransformationForm, QueueTransformationForm_create


 def queue_document_list(request, queue_name='default'):
@@ -38,8 +38,10 @@ def queue_document_list(request, queue_name='default'):
        extra_context={
            'title': _(u'documents in queue: %s') % document_queue,
            'hide_object': True,
-            'object': document_queue,
+            'queue': document_queue,
            'object_name': _(u'document queue'),
+            'navigation_object_name': 'queue',
+            'list_object_variable_name': 'queue_document',               
            'extra_columns': [
                {'name': 'document', 'attribute': lambda x: document_link(x.document) if hasattr(x, 'document') else _(u'Missing document.')},
                {'name': _(u'thumbnail'), 'attribute': lambda x: document_thumbnail(x.document)},
@@ -212,7 +214,8 @@ def document_queue_disable(request, document_queue_id):
        return HttpResponseRedirect(next)

    return render_to_response('generic_confirm.html', {
-        'object': document_queue,
+        'queue': document_queue,
+        'navigation_object_name': 'queue',
        'title': _(u'Are you sure you wish to disable document queue: %s') % document_queue,
        'next': next,
        'previous': previous,
@@ -238,7 +241,8 @@ def document_queue_enable(request, document_queue_id):
        return HttpResponseRedirect(next)

    return render_to_response('generic_confirm.html', {
-        'object': document_queue,
+        'queue': document_queue,
+        'navigation_object_name': 'queue',
        'title': _(u'Are you sure you wish to activate document queue: %s') % document_queue,
        'next': next,
        'previous': previous,
@@ -317,3 +321,141 @@ def node_active_list(request):
            {'name': _(u'related object'), 'attribute': lambda x: display_link(x['related_object']) if x['related_object'] else u''}
        ],
    }, context_instance=RequestContext(request))
+
+
+def setup_queue_transformation_list(request, document_queue_id):
+    #check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT])
+    
+    document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id)
+
+    context = {
+        'object_list': QueueTransformation.objects.get_for_object(document_queue),
+        'title': _(u'transformations for: %s') % document_queue,
+        #'object_name': _(u'document queue'),
+        #'object': document_queue,
+        'queue': document_queue,
+        'object_name': _(u'document queue'),
+        'navigation_object_name': 'queue',
+        'list_object_variable_name': 'transformation',        
+        'extra_columns': [
+            {'name': _(u'order'), 'attribute': 'order'},
+            {'name': _(u'transformation'), 'attribute': lambda x: x.get_transformation_display()},
+            {'name': _(u'arguments'), 'attribute': 'arguments'}
+            ],
+        'hide_link': True,
+        'hide_object': True,
+    }
+
+    return render_to_response('generic_list.html', context,
+        context_instance=RequestContext(request))    
+
+
+def setup_queue_transformation_edit(request, transformation_id):
+    #check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT])
+    
+    transformation = get_object_or_404(QueueTransformation, pk=transformation_id)
+    redirect_view = reverse('setup_queue_transformation_list', args=[transformation.content_object.pk])
+    next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', redirect_view)))
+
+    if request.method == 'POST':
+        form = QueueTransformationForm(instance=transformation, data=request.POST)
+        if form.is_valid():
+            try:
+                # Test the validity of the argument field
+                eval(form.cleaned_data['arguments'], {})
+            except:
+                messages.error(request, _(u'Queue transformation argument error.'))
+            else:
+                try:
+                    form.save()
+                    messages.success(request, _(u'Queue transformation edited successfully'))
+                    return HttpResponseRedirect(next)
+                except Exception, e:
+                    messages.error(request, _(u'Error editing queue transformation; %s') % e)
+    else:
+        form = QueueTransformationForm(instance=transformation)
+
+    return render_to_response('generic_form.html', {
+        'title': _(u'Edit transformation: %s') % transformation,
+        'form': form,
+        'queue': transformation.content_object,
+        'transformation': transformation,
+        'navigation_object_list': [
+            {'object': 'queue', 'name': _(u'document queue')},
+            {'object': 'transformation', 'name': _(u'transformation')}
+        ],
+        'next': next,
+    },
+    context_instance=RequestContext(request))        
+
+
+def setup_queue_transformation_delete(request, transformation_id):
+    #check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT])
+
+    transformation = get_object_or_404(QueueTransformation, pk=transformation_id)
+    redirect_view = reverse('setup_queue_transformation_list', args=[transformation.content_object.pk])
+    previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', redirect_view)))
+
+    if request.method == 'POST':
+        try:
+            transformation.delete()
+            messages.success(request, _(u'Queue transformation deleted successfully.'))
+        except Exception, e:
+            messages.error(request, _(u'Error deleting queue transformation; %(error)s') % {
+                'error': e}
+            )
+        return HttpResponseRedirect(redirect_view)
+
+    return render_to_response('generic_confirm.html', {
+        'delete_view': True,
+        'transformation': transformation,
+        'queue': transformation.content_object,
+        'navigation_object_list': [
+            {'object': 'queue', 'name': _(u'document queue')},
+            {'object': 'transformation', 'name': _(u'transformation')}
+        ],            
+        'title': _(u'Are you sure you wish to delete queue transformation "%(transformation)s"') % {
+            'transformation': transformation.get_transformation_display(),
+        },
+        'previous': previous,
+        'form_icon': u'shape_square_delete.png',
+    },
+    context_instance=RequestContext(request))       
+
+
+def setup_queue_transformation_create(request, document_queue_id):
+    #check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT])
+
+    document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id)
+    
+    redirect_view = reverse('setup_queue_transformation_list', args=[document_queue.pk])
+    previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', redirect_view)))
+    
+    if request.method == 'POST':
+        form = QueueTransformationForm_create(request.POST)
+        if form.is_valid():
+            try:
+                # Test the validity of the argument field
+                eval(form.cleaned_data['arguments'], {})
+            except:
+                messages.error(request, _(u'Queue transformation argument error.'))
+            else:            
+                try:
+                    queue_tranformation = form.save(commit=False)
+                    queue_tranformation.content_object = document_queue
+                    queue_tranformation.save()
+                    messages.success(request, _(u'Queue transformation created successfully'))
+                    return HttpResponseRedirect(redirect_view)
+                except Exception, e:
+                    messages.error(request, _(u'Error creating queue transformation; %s') % e)
+    else:
+        form = QueueTransformationForm_create()
+        
+    return render_to_response('generic_form.html', {
+        'form': form,
+        'queue': document_queue,
+        'object_name': _(u'document queue'),
+        'navigation_object_name': 'queue',
+        'title': _(u'Create new transformation for queue: %s') % document_queue,
+    }, context_instance=RequestContext(request))
+
--- a/apps/sources/managers.py
+++ b/apps/sources/managers.py
@@ -6,3 +6,19 @@ class SourceTransformationManager(models.Manager):
    def get_for_object(self, obj):
        ct = ContentType.objects.get_for_model(obj)
        return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk)
+
+    def get_for_object_as_list(self, obj):
+        warnings = []
+        transformations = []
+        for transformation in self.get_for_object(obj).values('transformation', 'arguments'):
+            try:
+                transformations.append(
+                    {
+                        'transformation': transformation['transformation'],
+                        'arguments': eval(transformation['arguments'], {})
+                    }
+                )
+            except Exception, e:
+                warnings.append(e)
+        
+        return transformations, warnings
--- a/apps/sources/models.py
+++ b/apps/sources/models.py
@@ -4,14 +4,13 @@ from django.contrib.contenttypes.models import ContentType
 from django.contrib.contenttypes import generic

 from documents.models import DocumentType
-from documents.conf.settings import AVAILABLE_TRANSFORMATIONS
 from documents.managers import RecentDocumentManager
 from metadata.models import MetadataType
+from converter.api import get_available_transformations_choices
+from converter.literals import DIMENSION_SEPARATOR    

 from sources.managers import SourceTransformationManager

-available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()])
-
 SOURCE_UNCOMPRESS_CHOICE_Y = 'y'
 SOURCE_UNCOMPRESS_CHOICE_N = 'n'
 SOURCE_UNCOMPRESS_CHOICE_ASK = 'a'
@@ -120,7 +119,7 @@ class StagingFolder(InteractiveBaseModel):
        if self.preview_height:
            dimensions.append(unicode(self.preview_height))

-        return u'x'.join(dimensions)
+        return DIMENSION_SEPARATOR.join(dimensions)

    class Meta(InteractiveBaseModel.Meta):
        verbose_name = _(u'staging folder')
@@ -164,8 +163,8 @@ class SourceTransformation(models.Model):
    object_id = models.PositiveIntegerField()
    content_object = generic.GenericForeignKey('content_type', 'object_id')
    order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True)
-    transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation'))
-    arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}'))
+    transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation'))
+    arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}')

    objects = SourceTransformationManager()

--- a/apps/sources/staging.py
+++ b/apps/sources/staging.py
@@ -8,11 +8,9 @@ from django.utils.translation import ugettext
 from django.contrib import messages
 from django.utils.translation import ugettext_lazy as _

-from converter import TRANFORMATION_CHOICES
 from converter.api import convert, cache_cleanup

 DEFAULT_STAGING_DIRECTORY = u'/tmp'
-#from documents.conf.settings import DEFAULT_TRANSFORMATIONS

 HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
 #TODO: Do benchmarks
@@ -107,16 +105,15 @@ class StagingFile(object):
    def upload(self):
        """
        Return a StagingFile encapsulated in a File class instance to
-        allow for easier upload a staging files
+        allow for easier upload of staging files
        """
        try:
            return File(file(self.filepath, 'rb'), name=self.filename)
        except Exception, exc:
            raise Exception(ugettext(u'Unable to upload staging file: %s') % exc)

-    def delete(self, preview_size):
-        # tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS)
-        cache_cleanup(self.filepath, size=preview_size)# , extra_options=tranformation_string)
+    def delete(self, preview_size, transformations):
+        cache_cleanup(self.filepath, size=preview_size, transformations=transformations)
        try:
            os.unlink(self.filepath)
        except OSError, exc:
@@ -125,24 +122,7 @@ class StagingFile(object):
            else:
                raise OSError(ugettext(u'Unable to delete staging file: %s') % exc)

-    def preview(self, preview_size):
+    def preview(self, preview_size, transformations):
        errors = []
-        # tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS)
-        # output_file = convert(self.filepath, size=STAGING_FILES_PREVIEW_SIZE, extra_options=tranformation_string, cleanup_files=False)
-        output_file = convert(self.filepath, size=preview_size, cleanup_files=False)
+        output_file = convert(self.filepath, size=preview_size, cleanup_files=False, transformations=transformations)
        return output_file, errors
-
-
-def get_transformation_string(transformations):
-    transformation_list = []
-    errors = []
-    for transformation in transformations:
-        try:
-            if transformation['name'] in TRANFORMATION_CHOICES:
-                output = TRANFORMATION_CHOICES[transformation['name']] % eval(transformation['arguments'])
-                transformation_list.append(output)
-        except Exception, e:
-            errors.append(e)
-
-    tranformation_string = ' '.join(transformation_list)
-    return tranformation_string, errors
--- a/apps/sources/urls.py
+++ b/apps/sources/urls.py
@@ -16,12 +16,12 @@ urlpatterns = patterns('sources.views',
    url(r'^setup/interactive/staging_folder/list/$', 'setup_source_list', {'source_type': SOURCE_CHOICE_STAGING}, 'setup_staging_folder_list'),

    url(r'^setup/interactive/(?P<source_type>\w+)/list/$', 'setup_source_list', (), 'setup_source_list'),
-    url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\w+)/edit/$', 'setup_source_edit', (), 'setup_source_edit'),
-    url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\w+)/delete/$', 'setup_source_delete', (), 'setup_source_delete'),
+    url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\d+)/edit/$', 'setup_source_edit', (), 'setup_source_edit'),
+    url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\d+)/delete/$', 'setup_source_delete', (), 'setup_source_delete'),
    url(r'^setup/interactive/(?P<source_type>\w+)/create/$', 'setup_source_create', (), 'setup_source_create'),

-    url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\w+)/transformation/list/$', 'setup_source_transformation_list', (), 'setup_source_transformation_list'),
-    url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\w+)/transformation/create/$', 'setup_source_transformation_create', (), 'setup_source_transformation_create'),
-    url(r'^setup/interactive/source/transformation/(?P<transformation_id>\w+)/edit/$', 'setup_source_transformation_edit', (), 'setup_source_transformation_edit'),
-    url(r'^setup/interactive/source/transformation/(?P<transformation_id>\w+)/delete/$', 'setup_source_transformation_delete', (), 'setup_source_transformation_delete'),
+    url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\d+)/transformation/list/$', 'setup_source_transformation_list', (), 'setup_source_transformation_list'),
+    url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\d+)/transformation/create/$', 'setup_source_transformation_create', (), 'setup_source_transformation_create'),
+    url(r'^setup/interactive/source/transformation/(?P<transformation_id>\d+)/edit/$', 'setup_source_transformation_edit', (), 'setup_source_transformation_edit'),
+    url(r'^setup/interactive/source/transformation/(?P<transformation_id>\d+)/delete/$', 'setup_source_transformation_delete', (), 'setup_source_transformation_delete'),
 )
--- a/apps/sources/views.py
+++ b/apps/sources/views.py
@@ -129,9 +129,13 @@ def upload_interactive(request, source_type=None, source_id=None):
                                expand = True
                            else:
                                expand = False
-                        if (not expand) or (expand and not _handle_zip_file(request, request.FILES['file'], document_type)):
+
+                        transformations, errors = SourceTransformation.objects.get_for_object_as_list(web_form)
+                               
+                        if (not expand) or (expand and not _handle_zip_file(request, request.FILES['file'], document_type=document_type, transformations=transformations)):
                            instance = form.save()
                            instance.save()
+                            instance.apply_default_transformations(transformations)
                            if document_type:
                                instance.document_type = document_type
                            _handle_save_document(request, instance, form)
@@ -174,16 +178,18 @@ def upload_interactive(request, source_type=None, source_id=None):
                                expand = True
                            else:
                                expand = False                        
-                        if (not expand) or (expand and not _handle_zip_file(request, staging_file.upload(), document_type)):
+                        transformations, errors = SourceTransformation.objects.get_for_object_as_list(staging_folder)
+                        if (not expand) or (expand and not _handle_zip_file(request, staging_file.upload(), document_type=document_type, transformations=transformations)):
                            document = Document(file=staging_file.upload())
                            if document_type:
                                document.document_type = document_type
                            document.save()
+                            document.apply_default_transformations(transformations)
                            _handle_save_document(request, document, form)
                            messages.success(request, _(u'Staging file: %s, uploaded successfully.') % staging_file.filename)

                        if staging_folder.delete_after_upload:
-                            staging_file.delete(staging_folder.get_preview_size())
+                            staging_file.delete(preview_size=staging_folder.get_preview_size(), transformations=transformations)
                            messages.success(request, _(u'Staging file: %s, deleted successfully.') % staging_file.filename)
                    except Exception, e:
                        messages.error(request, e)
@@ -260,7 +266,7 @@ def _handle_save_document(request, document, form=None):
    create_history(HISTORY_DOCUMENT_CREATED, document, {'user': request.user})


-def _handle_zip_file(request, uploaded_file, document_type=None):
+def _handle_zip_file(request, uploaded_file, document_type=None, transformations=None):
    filename = getattr(uploaded_file, 'filename', getattr(uploaded_file, 'name', ''))
    if filename.lower().endswith('zip'):
        zfobj = zipfile.ZipFile(uploaded_file)
@@ -285,7 +291,12 @@ def staging_file_preview(request, source_type, source_id, staging_file_id):
    staging_folder = get_object_or_404(StagingFolder, pk=source_id)
    StagingFile = create_staging_file_class(request, staging_folder.folder_path)
    try:
-        output_file, errors = StagingFile.get(staging_file_id).preview(staging_folder.get_preview_size())
+        transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder)
+        
+        output_file, errors = StagingFile.get(staging_file_id).preview(
+            preview_size=staging_folder.get_preview_size(),
+            transformations=transformations
+        )
        if errors and (request.user.is_staff or request.user.is_superuser):
            for error in errors:
                messages.warning(request, _(u'Staging file transformation error: %(error)s') % {
@@ -313,15 +324,19 @@ def staging_file_delete(request, source_type, source_id, staging_file_id):
    StagingFile = create_staging_file_class(request, staging_folder.folder_path)    

    staging_file = StagingFile.get(staging_file_id)
-    next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
-    previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
+    next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', '/')))
+    previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', '/')))

    if request.method == 'POST':
        try:
-            staging_file.delete(staging_folder.get_preview_size())
+            transformations, errors=SourceTransformation.objects.get_for_object_as_list(staging_folder)
+            staging_file.delete(
+                preview_size=staging_folder.get_preview_size(),
+                transformations=transformations
+            )
            messages.success(request, _(u'Staging file delete successfully.'))
        except Exception, e:
-            messages.error(request, e)
+            messages.error(request, _(u'Staging file delete error; %s.') % e)
        return HttpResponseRedirect(next)

    results = get_active_tab_links()
@@ -509,11 +524,17 @@ def setup_source_transformation_edit(request, transformation_id):
        form = SourceTransformationForm(instance=source_transformation, data=request.POST)
        if form.is_valid():
            try:
-                form.save()
-                messages.success(request, _(u'Source transformation edited successfully'))
-                return HttpResponseRedirect(next)
-            except Exception, e:
-                messages.error(request, _(u'Error editing source transformation; %s') % e)
+                # Test the validity of the argument field
+                eval(form.cleaned_data['arguments'], {})
+            except:
+                messages.error(request, _(u'Source transformation argument error.'))
+            else:
+                try:
+                    form.save()
+                    messages.success(request, _(u'Source transformation edited successfully'))
+                    return HttpResponseRedirect(next)
+                except Exception, e:
+                    messages.error(request, _(u'Error editing source transformation; %s') % e)
    else:
        form = SourceTransformationForm(instance=source_transformation)

@@ -541,9 +562,9 @@ def setup_source_transformation_delete(request, transformation_id):
    if request.method == 'POST':
        try:
            source_transformation.delete()
-            messages.success(request, _(u'Transformation deleted successfully.'))
+            messages.success(request, _(u'Source transformation deleted successfully.'))
        except Exception, e:
-            messages.error(request, _(u'Error deleting transformation; %(error)s') % {
+            messages.error(request, _(u'Error deleting source transformation; %(error)s') % {
                'error': e}
            )
        return HttpResponseRedirect(redirect_view)
@@ -556,7 +577,7 @@ def setup_source_transformation_delete(request, transformation_id):
            {'object': 'source', 'name': _(u'source')},
            {'object': 'transformation', 'name': _(u'transformation')}
        ],            
-        'title': _(u'Are you sure you wish to delete transformation "%(transformation)s"') % {
+        'title': _(u'Are you sure you wish to delete source transformation "%(transformation)s"') % {
            'transformation': source_transformation.get_transformation_display(),
        },
        'previous': previous,
@@ -598,13 +619,19 @@ def setup_source_transformation_create(request, source_type, source_id):
        form = SourceTransformationForm_create(request.POST)
        if form.is_valid():
            try:
-                source_tranformation = form.save(commit=False)
-                source_tranformation.content_object = source
-                source_tranformation.save()
-                messages.success(request, _(u'Source transformation created successfully'))
-                return HttpResponseRedirect(redirect_view)
-            except Exception, e:
-                messages.error(request, _(u'Error creating source transformation; %s') % e)
+                # Test the validity of the argument field
+                eval(form.cleaned_data['arguments'], {})
+            except:
+                messages.error(request, _(u'Source transformation argument error.'))
+            else:            
+                try:
+                    source_tranformation = form.save(commit=False)
+                    source_tranformation.content_object = source
+                    source_tranformation.save()
+                    messages.success(request, _(u'Source transformation created successfully'))
+                    return HttpResponseRedirect(redirect_view)
+                except Exception, e:
+                    messages.error(request, _(u'Error creating source transformation; %s') % e)
    else:
        form = SourceTransformationForm_create()
        
--- a/requirements/development.txt
+++ b/requirements/development.txt
@@ -9,3 +9,5 @@ django-celery==2.2.2
 django-sentry==1.6.0
 django-taggit==0.9.3
 -e git://github.com/django-mptt/django-mptt.git@0af02a95877041b2fd6d458bd95413dc1666c321#egg=django-mptt
+slate==0.3
+PIL==1.1.7
--- a/requirements/production.txt
+++ b/requirements/production.txt
@@ -6,3 +6,5 @@ django-celery==2.2.2
 django-sentry==1.6.0
 django-taggit==0.9.3
 -e git://github.com/django-mptt/django-mptt.git@0af02a95877041b2fd6d458bd95413dc1666c321#egg=django-mptt
+slate==0.3
+PIL==1.1.7