Merge branch 'master_merge_test'

This commit is contained in:
Roberto Rosario
2011-07-21 03:56:31 -04:00
66 changed files with 2596 additions and 998 deletions

View File

@@ -5,7 +5,7 @@ Open source, Django based document manager with custom metadata indexing, file s
[Website](http://bit.ly/mayan-edms)
Requirements
Basic requirements
---
Python:
@@ -15,6 +15,21 @@ Python:
* django-filetransfers - File upload/download abstraction
* celery- asynchronous task queue/job queue based on distributed message passing
* django-celery - celery Django integration
* django-mptt - Utilities for implementing a modified pre-order traversal tree in django
* python-magic - A python wrapper for libmagic
* django-taggit - Simple tagging for django
* slate - The simplest way to extract text from PDFs in Python
Execute pip install -r requirements/production.txt to install the python/django dependencies automatically.
Executables:
* tesseract-ocr - An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google.
* unpaper - post-processing scanned and photocopied book pages
Optional requirements
---
For the GridFS storage backend:
@@ -22,13 +37,12 @@ For the GridFS storage backend:
* GridFS - a storage specification for large objects in MongoDB
* MongoDB - a scalable, open source, document-oriented database
Or execute pip install -r requirements/production.txt to install the dependencies automatically.
Libraries:
Executables:
* libmagic - MIME detection library, if not installed Mayan will fall back to using python's simpler mimetype built in library
Mayan has the ability to switch between different image conversion backends, at the moment these two are supported:
* libmagic - MIME detection library
* tesseract-ocr - An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google.
* unpaper - post-processing scanned and photocopied book pages
* ImageMagick - Convert, Edit, Or Compose Bitmap Images
* GraphicMagick - Robust collection of tools and libraries to read, write, and manipulate an image.

View File

@@ -8,9 +8,7 @@ from django.db.models import signals
from navigation.api import register_links
from common.conf import settings as common_settings
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY \
if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
from common.utils import validate_path
def has_usable_password(context):
@@ -22,7 +20,6 @@ current_user_edit = {'text': _(u'edit details'), 'view': 'current_user_edit', 'f
register_links(['current_user_details', 'current_user_edit', 'password_change_view'], [current_user_details, current_user_edit, password_change_view], menu_name='secondary_menu')
if common_settings.AUTO_CREATE_ADMIN:
# From https://github.com/lambdalisue/django-qwert/blob/master/qwert/autoscript/__init__.py
# From http://stackoverflow.com/questions/1466827/ --
@@ -50,3 +47,6 @@ if common_settings.AUTO_CREATE_ADMIN:
dispatch_uid='django.contrib.auth.management.create_superuser')
signals.post_syncdb.connect(create_testuser,
sender=auth_models, dispatch_uid='common.models.create_testuser')
if (validate_path(common_settings.TEMPORARY_DIRECTORY) == False) or (not common_settings.TEMPORARY_DIRECTORY):
setattr(common_settings, 'TEMPORARY_DIRECTORY', tempfile.mkdtemp())

View File

@@ -3,6 +3,7 @@
{% load pagination_tags %}
{% load navigation_tags %}
{% load non_breakable %}
{% load variable_tags %}
{% if side_bar %}
<div class="block">
@@ -122,13 +123,17 @@
{% endif %}
{% endfor %}
{% if not hide_links %}
{% if list_object_variable_name %}
{% copy_variable object as list_object_variable_name %}
{% copy_variable list_object_variable_name as "navigation_object_name" %}
{% endif %}
<td class="last">
{% if navigation_object_links %}
{% with navigation_object_links as overrided_object_links %}
{% object_navigation_template %}
{% endwith %}
{% else %}
{% object_navigation_template %}
{% object_navigation_template %}
{% endif %}
</td>
{% endif %}

View File

@@ -0,0 +1,42 @@
import re
from django.template import Node, TemplateSyntaxError, Library, Variable
register = Library()
class CopyNode(Node):
def __init__(self, source_variable, var_name, delete_old=False):
self.source_variable = source_variable
self.var_name = var_name
self.delete_old = delete_old
def render(self, context):
context[Variable(self.var_name).resolve(context)] = Variable(self.source_variable).resolve(context)
if self.delete_old:
context[Variable(self.source_variable).resolve(context)] = u''
return ''
@register.tag
def copy_variable(parser, token):
return parse_tag(parser, token)
@register.tag
def rename_variable(parser, token):
return parse_tag(parser, token, {'delete_old': True})
def parse_tag(parser, token, *args, **kwargs):
# This version uses a regular expression to parse tag contents.
try:
# Splitting by None == splitting by spaces.
tag_name, arg = token.contents.split(None, 1)
except ValueError:
raise TemplateSyntaxError('%r tag requires arguments' % token.contents.split()[0])
m = re.search(r'(.*?) as ([\'"]*\w+[\'"]*)', arg)
if not m:
raise TemplateSyntaxError('%r tag had invalid arguments' % tag_name)
source_variable, var_name = m.groups()
return CopyNode(source_variable, var_name, *args, **kwargs)

View File

@@ -2,6 +2,7 @@
import os
import re
import types
import tempfile
from django.utils.http import urlquote as django_urlquote
from django.utils.http import urlencode as django_urlencode
@@ -12,6 +13,15 @@ from django.contrib.contenttypes.models import ContentType
from django.contrib.auth.models import User
try:
from python_magic import magic
USE_PYTHON_MAGIC = True
except:
import mimetypes
mimetypes.init()
USE_PYTHON_MAGIC = False
def urlquote(link=None, get=None):
u'''
This method does both: urlquote() and urlencode()
@@ -337,3 +347,50 @@ def return_diff(old_obj, new_obj, attrib_list=None):
}
return diff_dict
def get_mimetype(filepath):
"""
Determine a file's mimetype by calling the system's libmagic
library via python-magic or fallback to use python's mimetypes
library
"""
file_mimetype = u''
file_mime_encoding = u''
if USE_PYTHON_MAGIC:
if os.path.exists(filepath):
try:
source = open(filepath, 'r')
mime = magic.Magic(mime=True)
file_mimetype = mime.from_buffer(source.read())
source.seek(0)
mime_encoding = magic.Magic(mime_encoding=True)
file_mime_encoding = mime_encoding.from_buffer(source.read())
finally:
if source:
source.close()
else:
path, filename = os.path.split(filepath)
file_mimetype, file_mime_encoding = mimetypes.guess_type(filename)
return file_mimetype, file_mime_encoding
def validate_path(path):
if os.path.exists(path) != True:
# If doesn't exist try to create it
try:
os.mkdir(path)
except:
return False
# Check if it is writable
try:
fd, test_filepath = tempfile.mkstemp(dir=path)
os.close(fd)
os.unlink(test_filepath)
except:
return False
return True

View File

@@ -1,11 +1,16 @@
from django.utils.translation import ugettext_lazy as _
from django.core.exceptions import ImproperlyConfigured
from navigation.api import register_sidebar_template
TRANFORMATION_CHOICES = {
u'rotate': u'-rotate %(degrees)d'
}
from converter.utils import load_backend
from converter.conf.settings import GRAPHICS_BACKEND
formats_list = {'text': _('file formats'), 'view': 'formats_list', 'famfam': 'pictures'}
register_sidebar_template(['formats_list'], 'converter_file_formats_help.html')
try:
backend = load_backend().ConverterClass()
except ImproperlyConfigured:
raise ImproperlyConfigured(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND)

View File

@@ -1,91 +1,29 @@
import os
import subprocess
import hashlib
from django.utils.importlib import import_module
from django.template.defaultfilters import slugify
from common.conf.settings import TEMPORARY_DIRECTORY
from converter.conf.settings import UNPAPER_PATH
from converter.conf.settings import OCR_OPTIONS
from converter.conf.settings import DEFAULT_OPTIONS
from converter.conf.settings import LOW_QUALITY_OPTIONS
from converter.conf.settings import HIGH_QUALITY_OPTIONS
from converter.conf.settings import PRINT_QUALITY_OPTIONS
from converter.conf.settings import GRAPHICS_BACKEND
from converter.conf.settings import UNOCONV_PATH
from converter.exceptions import OfficeConversionError
from converter.literals import DEFAULT_PAGE_NUMBER, \
DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, DEFAULT_FILE_FORMAT
from converter.exceptions import UnpaperError, OfficeConversionError
from common import TEMPORARY_DIRECTORY
from documents.utils import document_save_to_temp_dir
DEFAULT_ZOOM_LEVEL = 100
DEFAULT_ROTATION = 0
DEFAULT_PAGE_INDEX_NUMBER = 0
DEFAULT_FILE_FORMAT = u'jpg'
DEFAULT_OCR_FILE_FORMAT = u'tif'
QUALITY_DEFAULT = u'quality_default'
QUALITY_LOW = u'quality_low'
QUALITY_HIGH = u'quality_high'
QUALITY_PRINT = u'quality_print'
QUALITY_SETTINGS = {
QUALITY_DEFAULT: DEFAULT_OPTIONS,
QUALITY_LOW: LOW_QUALITY_OPTIONS,
QUALITY_HIGH: HIGH_QUALITY_OPTIONS,
QUALITY_PRINT: PRINT_QUALITY_OPTIONS
}
from converter import backend
from converter.literals import TRANSFORMATION_CHOICES
from converter.literals import TRANSFORMATION_RESIZE, \
TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
TRANSFORMATION_ZOOM
from converter.literals import DIMENSION_SEPARATOR
from converter.utils import cleanup
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
CONVERTER_OFFICE_FILE_EXTENSIONS = [
u'ods', u'docx', u'doc'
]
def _lazy_load(fn):
_cached = []
def _decorated():
if not _cached:
_cached.append(fn())
return _cached[0]
return _decorated
@_lazy_load
def _get_backend():
return import_module(GRAPHICS_BACKEND)
try:
backend = _get_backend()
except ImportError:
raise ImportError(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND)
def cleanup(filename):
"""
Tries to remove the given filename. Ignores non-existent files
"""
try:
os.remove(filename)
except OSError:
pass
def execute_unpaper(input_filepath, output_filepath):
"""
Executes the program unpaper using subprocess's Popen
"""
command = []
command.append(UNPAPER_PATH)
command.append(u'--overwrite')
command.append(input_filepath)
command.append(output_filepath)
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise UnpaperError(proc.stderr.readline())
def execute_unoconv(input_filepath, arguments=''):
"""
Executes the program unoconv using subprocess's Popen
@@ -109,19 +47,11 @@ def cache_cleanup(input_filepath, *args, **kwargs):
def create_image_cache_filename(input_filepath, *args, **kwargs):
if input_filepath:
temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))
temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename)
final_filepath = []
[final_filepath.append(str(arg)) for arg in args]
final_filepath.extend([u'%s_%s' % (key, value) for key, value in kwargs.items()])
temp_path += slugify(u'_'.join(final_filepath))
return temp_path
hash_value = HASH_FUNCTION(u''.join([input_filepath, unicode(args), unicode(kwargs)]))
return os.path.join(TEMPORARY_DIRECTORY, hash_value)
else:
return None
def convert_office_document(input_filepath):
if os.path.exists(UNOCONV_PATH):
@@ -130,27 +60,19 @@ def convert_office_document(input_filepath):
return None
def convert_document(document, *args, **kwargs):
document_filepath = create_image_cache_filename(document.checksum, *args, **kwargs)
if os.path.exists(document_filepath):
return document_filepath
return convert(document_save_to_temp_dir(document, document.checksum), *args, **kwargs)
def convert(input_filepath, *args, **kwargs):
def convert(input_filepath, output_filepath=None, cleanup_files=False, *args, **kwargs):
size = kwargs.get('size')
file_format = kwargs.get('file_format', DEFAULT_FILE_FORMAT)
extra_options = kwargs.get('extra_options', u'')
zoom = kwargs.get('zoom', DEFAULT_ZOOM_LEVEL)
rotation = kwargs.get('rotation', DEFAULT_ROTATION)
page = kwargs.get('page', DEFAULT_PAGE_INDEX_NUMBER)
cleanup_files = kwargs.get('cleanup_files', True)
quality = kwargs.get('quality', QUALITY_DEFAULT)
page = kwargs.get('page', DEFAULT_PAGE_NUMBER)
transformations = kwargs.get('transformations', [])
unoconv_output = None
output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs)
if output_filepath is None:
output_filepath = create_image_cache_filename(input_filepath, *args, **kwargs)
if os.path.exists(output_filepath):
return output_filepath
@@ -160,20 +82,33 @@ def convert(input_filepath, *args, **kwargs):
if result:
unoconv_output = result
input_filepath = result
extra_options = u''
input_arg = u'%s[%s]' % (input_filepath, page)
extra_options += u' -resize %s' % size
if size:
transformations.append(
{
'transformation': TRANSFORMATION_RESIZE,
'arguments': dict(zip([u'width', u'height'], size.split(DIMENSION_SEPARATOR)))
}
)
if zoom != 100:
extra_options += u' -resize %d%% ' % zoom
transformations.append(
{
'transformation': TRANSFORMATION_ZOOM,
'arguments': {'percent': zoom}
}
)
if rotation != 0 and rotation != 360:
extra_options += u' -rotate %d ' % rotation
transformations.append(
{
'transformation': TRANSFORMATION_ROTATE,
'arguments': {'degrees': rotation}
}
)
if format == u'jpg':
extra_options += u' -quality 85'
try:
backend.execute_convert(input_filepath=input_arg, arguments=extra_options, output_filepath=u'%s:%s' % (file_format, output_filepath), quality=quality)
backend.convert_file(input_filepath=input_filepath, output_filepath=output_filepath, transformations=transformations, page=page, file_format=file_format)
finally:
if cleanup_files:
cleanup(input_filepath)
@@ -184,51 +119,22 @@ def convert(input_filepath, *args, **kwargs):
def get_page_count(input_filepath):
try:
return len(backend.execute_identify(unicode(input_filepath)).splitlines())
except:
#TODO: send to other page number identifying program
return 1
return backend.get_page_count(input_filepath)
def get_document_dimensions(document, *args, **kwargs):
document_filepath = create_image_cache_filename(document.checksum, *args, **kwargs)
if os.path.exists(document_filepath):
options = [u'-format', u'%w %h']
return [int(dimension) for dimension in backend.execute_identify(unicode(document_filepath), options).split()]
return [int(dimension) for dimension in backend.identify_file(unicode(document_filepath), options).split()]
else:
return [0, 0]
def convert_document_for_ocr(document, page=DEFAULT_PAGE_INDEX_NUMBER, file_format=DEFAULT_OCR_FILE_FORMAT):
#Extract document file
input_filepath = document_save_to_temp_dir(document, document.uuid)
#Convert for OCR
temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))
temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename)
transformation_output_file = u'%s_trans%s%s%s' % (temp_path, page, os.extsep, file_format)
unpaper_input_file = u'%s_unpaper_in%s%spnm' % (temp_path, page, os.extsep)
unpaper_output_file = u'%s_unpaper_out%s%spnm' % (temp_path, page, os.extsep)
convert_output_file = u'%s_ocr%s%s%s' % (temp_path, page, os.extsep, file_format)
input_arg = u'%s[%s]' % (input_filepath, page)
try:
document_page = document.documentpage_set.get(page_number=page + 1)
transformation_string, warnings = document_page.get_transformation_string()
#Apply default transformations
backend.execute_convert(input_filepath=input_arg, quality=QUALITY_HIGH, arguments=transformation_string, output_filepath=transformation_output_file)
#Do OCR operations
backend.execute_convert(input_filepath=transformation_output_file, arguments=OCR_OPTIONS, output_filepath=unpaper_input_file)
# Process by unpaper
execute_unpaper(input_filepath=unpaper_input_file, output_filepath=unpaper_output_file)
# Convert to tif
backend.execute_convert(input_filepath=unpaper_output_file, output_filepath=convert_output_file)
finally:
cleanup(transformation_output_file)
cleanup(unpaper_input_file)
cleanup(unpaper_output_file)
return convert_output_file
def get_available_transformations_choices():
result = []
for transformation in backend.get_available_transformations():
transformation_template = u'%s %s' % (TRANSFORMATION_CHOICES[transformation]['label'], u','.join(['<%s>' % argument['name'] if argument['required'] else '[%s]' % argument['name'] for argument in TRANSFORMATION_CHOICES[transformation]['arguments']]))
result.append([transformation, transformation_template])
return result

View File

@@ -0,0 +1,18 @@
class ConverterBase(object):
"""
Base class that all backend classes must inherit
"""
def convert_file(self, input_filepath, *args, **kwargs):
raise NotImplementedError("Your %s class has not defined a convert_file() method, which is required." % self.__class__.__name__)
def convert_document(self, document, *args, **kwargs):
raise NotImplementedError("Your %s class has not defined a convert_document() method, which is required." % self.__class__.__name__)
def get_format_list(self):
raise NotImplementedError("Your %s class has not defined a get_format_list() method, which is required." % self.__class__.__name__)
def get_available_transformations(self):
raise NotImplementedError("Your %s class has not defined a get_available_transformations() method, which is required." % self.__class__.__name__)
def get_page_count(self):
raise NotImplementedError("Your %s class has not defined a get_page_count() method, which is required." % self.__class__.__name__)

View File

@@ -1,71 +0,0 @@
import subprocess
import re
from converter.conf.settings import GM_PATH
from converter.conf.settings import GM_SETTINGS
from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS
from converter.exceptions import ConvertError, UnknownFormat, IdentifyError
CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format'
CONVERTER_ERROR_STARTS_WITH = u'starts with'
def execute_identify(input_filepath, arguments=None):
command = []
command.append(unicode(GM_PATH))
command.append(u'identify')
if arguments:
command.extend(arguments)
command.append(unicode(input_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise IdentifyError(proc.stderr.readline())
return proc.stdout.read()
def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
command = []
command.append(unicode(GM_PATH))
command.append(u'convert')
command.extend(unicode(QUALITY_SETTINGS[quality]).split())
command.extend(unicode(GM_SETTINGS).split())
command.append(unicode(input_filepath))
if arguments:
command.extend(unicode(arguments).split())
command.append(unicode(output_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
#Got an error from convert program
error_line = proc.stderr.readline()
if (CONVERTER_ERROR_STRING_NO_DECODER in error_line) or (CONVERTER_ERROR_STARTS_WITH in error_line):
#Try to determine from error message which class of error is it
raise UnknownFormat
else:
raise ConvertError(error_line)
def get_format_list():
"""
Call GraphicsMagick to parse all of it's supported file formats, and
return a list of the names and descriptions
"""
format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')
formats = []
command = []
command.append(unicode(GM_PATH))
command.append(u'convert')
command.append(u'-list')
command.append(u'formats')
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise ConvertError(proc.stderr.readline())
for line in proc.stdout.readlines():
fields = format_regex.findall(line)
if fields:
formats.append((fields[0][0], fields[0][3]))
return formats

View File

@@ -0,0 +1,119 @@
import subprocess
import re
from converter.conf.settings import GM_PATH
from converter.conf.settings import GM_SETTINGS
from converter.exceptions import ConvertError, UnknownFormat, \
IdentifyError
from converter.backends import ConverterBase
from converter.literals import TRANSFORMATION_RESIZE, \
TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
TRANSFORMATION_ZOOM
from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \
DEFAULT_FILE_FORMAT
CONVERTER_ERROR_STRING_NO_DECODER = u'No decode delegate for this image format'
CONVERTER_ERROR_STARTS_WITH = u'starts with'
class ConverterClass(ConverterBase):
def identify_file(self, input_filepath, arguments=None):
command = []
command.append(unicode(GM_PATH))
command.append(u'identify')
if arguments:
command.extend(arguments)
command.append(unicode(input_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise IdentifyError(proc.stderr.readline())
return proc.stdout.read()
def convert_file(self, input_filepath, output_filepath, transformations=None, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
arguments = []
if transformations:
for transformation in transformations:
if transformation['transformation'] == TRANSFORMATION_RESIZE:
dimensions = []
dimensions.append(unicode(transformation['arguments']['width']))
if 'height' in transformation['arguments']:
dimensions.append(unicode(transformation['arguments']['height']))
arguments.append(u'-resize')
arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions))
elif transformation['transformation'] == TRANSFORMATION_ZOOM:
arguments.append(u'-resize')
arguments.append(u'%d%%' % transformation['arguments']['percent'])
elif transformation['transformation'] == TRANSFORMATION_ROTATE:
arguments.append(u'-rotate')
arguments.append(u'%s' % transformation['arguments']['degrees'])
if file_format.lower() == u'jpeg' or file_format.lower() == u'jpg':
arguments.append(u'-quality')
arguments.append(u'85')
# Graphicsmagick page number is 0 base
input_arg = u'%s[%d]' % (input_filepath, page - 1)
# Specify the file format next to the output filename
output_filepath = u'%s:%s' % (file_format, output_filepath)
command = []
command.append(unicode(GM_PATH))
command.append(u'convert')
command.extend(unicode(GM_SETTINGS).split())
command.append(unicode(input_arg))
if arguments:
command.extend(arguments)
command.append(unicode(output_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
#Got an error from convert program
error_line = proc.stderr.readline()
if (CONVERTER_ERROR_STRING_NO_DECODER in error_line) or (CONVERTER_ERROR_STARTS_WITH in error_line):
#Try to determine from error message which class of error is it
raise UnknownFormat
else:
raise ConvertError(error_line)
def get_format_list(self):
"""
Call GraphicsMagick to parse all of it's supported file formats, and
return a list of the names and descriptions
"""
format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')
formats = []
command = []
command.append(unicode(GM_PATH))
command.append(u'convert')
command.append(u'-list')
command.append(u'formats')
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise ConvertError(proc.stderr.readline())
for line in proc.stdout.readlines():
fields = format_regex.findall(line)
if fields:
formats.append((fields[0][0], fields[0][3]))
return formats
def get_available_transformations(self):
return [
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \
TRANSFORMATION_ZOOM
]
def get_page_count(self, input_filepath):
try:
return len(self.identify_file(unicode(input_filepath)).splitlines())
except:
#TODO: send to other page number identifying program
return 1

View File

@@ -1,68 +0,0 @@
import subprocess
import re
from converter.conf.settings import IM_IDENTIFY_PATH
from converter.conf.settings import IM_CONVERT_PATH
from converter.api import QUALITY_DEFAULT, QUALITY_SETTINGS
from converter.exceptions import ConvertError, UnknownFormat, \
IdentifyError
CONVERTER_ERROR_STRING_NO_DECODER = u'no decode delegate for this image format'
def execute_identify(input_filepath, arguments=None):
command = []
command.append(unicode(IM_IDENTIFY_PATH))
if arguments:
command.extend(arguments)
command.append(unicode(input_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise IdentifyError(proc.stderr.readline())
return proc.stdout.read()
def execute_convert(input_filepath, output_filepath, quality=QUALITY_DEFAULT, arguments=None):
command = []
command.append(unicode(IM_CONVERT_PATH))
command.extend(unicode(QUALITY_SETTINGS[quality]).split())
command.append(unicode(input_filepath))
if arguments:
command.extend(unicode(arguments).split())
command.append(unicode(output_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
#Got an error from convert program
error_line = proc.stderr.readline()
if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
#Try to determine from error message which class of error is it
raise UnknownFormat
else:
raise ConvertError(error_line)
def get_format_list():
"""
Call ImageMagick to parse all of it's supported file formats, and
return a list of the names and descriptions
"""
format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')
formats = []
command = []
command.append(unicode(IM_CONVERT_PATH))
command.append(u'-list')
command.append(u'format')
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise ConvertError(proc.stderr.readline())
for line in proc.stdout.readlines():
fields = format_regex.findall(line)
if fields:
formats.append((fields[0][0], fields[0][3]))
return formats

View File

@@ -0,0 +1,116 @@
import subprocess
import re
from converter.conf.settings import IM_IDENTIFY_PATH
from converter.conf.settings import IM_CONVERT_PATH
from converter.exceptions import ConvertError, UnknownFormat, \
IdentifyError
from converter.backends import ConverterBase
from converter.literals import TRANSFORMATION_RESIZE, \
TRANSFORMATION_ROTATE, TRANSFORMATION_DENSITY, \
TRANSFORMATION_ZOOM
from converter.literals import DIMENSION_SEPARATOR, DEFAULT_PAGE_NUMBER, \
DEFAULT_FILE_FORMAT
CONVERTER_ERROR_STRING_NO_DECODER = u'no decode delegate for this image format'
class ConverterClass(ConverterBase):
def identify_file(self, input_filepath, arguments=None):
command = []
command.append(unicode(IM_IDENTIFY_PATH))
if arguments:
command.extend(arguments)
command.append(unicode(input_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise IdentifyError(proc.stderr.readline())
return proc.stdout.read()
def convert_file(self, input_filepath, output_filepath, transformations=None, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
arguments = []
if transformations:
for transformation in transformations:
if transformation['transformation'] == TRANSFORMATION_RESIZE:
dimensions = []
dimensions.append(unicode(transformation['arguments']['width']))
if 'height' in transformation['arguments']:
dimensions.append(unicode(transformation['arguments']['height']))
arguments.append(u'-resize')
arguments.append(u'%s' % DIMENSION_SEPARATOR.join(dimensions))
elif transformation['transformation'] == TRANSFORMATION_ZOOM:
arguments.append(u'-resize')
arguments.append(u'%d%%' % transformation['arguments']['percent'])
elif transformation['transformation'] == TRANSFORMATION_ROTATE:
arguments.append(u'-rotate')
arguments.append(u'%s' % transformation['arguments']['degrees'])
if file_format.lower() == u'jpeg' or file_format.lower() == u'jpg':
arguments.append(u'-quality')
arguments.append(u'85')
# Imagemagick page number is 0 base
input_arg = u'%s[%d]' % (input_filepath, page - 1)
# Specify the file format next to the output filename
output_filepath = u'%s:%s' % (file_format, output_filepath)
command = []
command.append(unicode(IM_CONVERT_PATH))
command.append(unicode(input_arg))
if arguments:
command.extend(arguments)
command.append(unicode(output_filepath))
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
#Got an error from convert program
error_line = proc.stderr.readline()
if CONVERTER_ERROR_STRING_NO_DECODER in error_line:
#Try to determine from error message which class of error is it
raise UnknownFormat
else:
raise ConvertError(error_line)
def get_format_list(self):
"""
Call ImageMagick to parse all of it's supported file formats, and
return a list of the names and descriptions
"""
format_regex = re.compile(' *([A-Z0-9]+)[*]? +([A-Z0-9]+) +([rw\-+]+) *(.*).*')
formats = []
command = []
command.append(unicode(IM_CONVERT_PATH))
command.append(u'-list')
command.append(u'format')
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise ConvertError(proc.stderr.readline())
for line in proc.stdout.readlines():
fields = format_regex.findall(line)
if fields:
formats.append((fields[0][0], fields[0][3]))
return formats
def get_available_transformations(self):
return [
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \
TRANSFORMATION_ZOOM
]
def get_page_count(self, input_filepath):
try:
return len(self.identify_file(unicode(input_filepath)).splitlines())
except:
#TODO: send to other page number identifying program
return 1

View File

@@ -0,0 +1,3 @@
from PIL import Image
Image.init()

View File

@@ -0,0 +1,171 @@
import tempfile
import os
import slate
from PIL import Image
import ghostscript
from django.utils.translation import ugettext_lazy as _
from common.utils import get_mimetype
from converter.exceptions import ConvertError, UnknownFormat, IdentifyError
from converter.backends import ConverterBase
from converter.literals import TRANSFORMATION_RESIZE, \
TRANSFORMATION_ROTATE, TRANSFORMATION_ZOOM
from converter.literals import DEFAULT_PAGE_NUMBER, \
DEFAULT_FILE_FORMAT
from converter.utils import cleanup
class ConverterClass(ConverterBase):
def get_page_count(self, input_filepath):
page_count = 1
mimetype, encoding = get_mimetype(input_filepath)
if mimetype == 'application/pdf':
# If file is a PDF open it with slate to determine the page
# count
with open(input_filepath) as fd:
pages = slate.PDF(fd)
return len(pages)
try:
im = Image.open(input_filepath)
except IOError: #cannot identify image file
# Return a page count of 1, to atleast allow the document
# to be created
return 1
try:
while 1:
im.seek(im.tell()+1)
page_count += 1
# do something to im
except EOFError:
pass # end of sequence
return page_count
def convert_file(self, input_filepath, output_filepath, transformations=None, page=DEFAULT_PAGE_NUMBER, file_format=DEFAULT_FILE_FORMAT):
tmpfile = None
mimetype, encoding = get_mimetype(input_filepath)
if mimetype == 'application/pdf':
# If file is a PDF open it with ghostscript and convert it to
# TIFF
first_page_tmpl = '-dFirstPage=%d' % page
last_page_tmpl = '-dLastPage=%d' % page
fd, tmpfile = tempfile.mkstemp()
os.close(fd)
output_file_tmpl = '-sOutputFile=%s' % tmpfile
input_file_tmpl = '-f%s' % input_filepath
args = [
'gs', '-q', '-dQUIET', '-dSAFER', '-dBATCH',
'-dNOPAUSE', '-dNOPROMPT',
first_page_tmpl, last_page_tmpl,
'-sDEVICE=jpeg', '-dJPEGQ=75',
'-r150', output_file_tmpl,
input_file_tmpl,
'-c "60000000 setvmthreshold"', # use 30MB
'-dNOGC', # No garbage collection
'-dMaxBitmap=500000000',
'-dAlignToPixels=0',
'-dGridFitTT=0',
'-dTextAlphaBits=4',
'-dGraphicsAlphaBits=4',
]
ghostscript.Ghostscript(*args)
page = 1 # Don't execute the following while loop
input_filepath = tmpfile
try:
im = Image.open(input_filepath)
except Exception: # Python Imaging Library doesn't recognize it as an image
raise UnknownFormat
finally:
if tmpfile:
cleanup(tmpfile)
current_page = 0
try:
while current_page == page - 1:
im.seek(im.tell() + 1)
current_page += 1
# do something to im
except EOFError:
pass # end of sequence
if transformations:
aspect = 1.0 * im.size[0] / im.size[1]
for transformation in transformations:
if transformation['transformation'] == TRANSFORMATION_RESIZE:
width = int(transformation['arguments']['width'])
height = int(transformation['arguments'].get('height', 1.0 * width * aspect))
im = self.resize(im, (width, height))
elif transformation['transformation'] == TRANSFORMATION_ZOOM:
decimal_value = float(transformation['arguments']['percent']) / 100
im = im.transform((im.size[0] * decimal_value, im.size[1] * decimal_value), Image.EXTENT, (0, 0, im.size[0], im.size[1]))
elif transformation['transformation'] == TRANSFORMATION_ROTATE:
# PIL counter degress counter-clockwise, reverse them
im = im.rotate(360 - transformation['arguments']['degrees'])
if im.mode not in ('L', 'RGB'):
im = im.convert('RGB')
im.save(output_filepath, format=file_format)
def get_format_list(self):
"""
Introspect PIL's internal registry to obtain a list of the
supported file types
"""
formats = []
for format_name in Image.ID:
formats.append((format_name, u''))
return formats
def get_available_transformations(self):
return [
TRANSFORMATION_RESIZE, TRANSFORMATION_ROTATE, \
TRANSFORMATION_ZOOM
]
# From: http://united-coders.com/christian-harms/image-resizing-tips-general-and-for-python
def resize(self, img, box, fit=False, out=None):
'''Downsample the image.
@param img: Image - an Image-object
@param box: tuple(x, y) - the bounding box of the result image
@param fit: boolean - crop the image to fill the box
@param out: file-like-object - save the image into the output stream
'''
#preresize image with factor 2, 4, 8 and fast algorithm
factor = 1
while img.size[0]/factor > 2*box[0] and img.size[1]*2/factor > 2*box[1]:
factor *=2
if factor > 1:
img.thumbnail((img.size[0]/factor, img.size[1]/factor), Image.NEAREST)
#calculate the cropping box and get the cropped part
if fit:
x1 = y1 = 0
x2, y2 = img.size
wRatio = 1.0 * x2/box[0]
hRatio = 1.0 * y2/box[1]
if hRatio > wRatio:
y1 = y2/2-box[1]*wRatio/2
y2 = y2/2+box[1]*wRatio/2
else:
x1 = x2/2-box[0]*hRatio/2
x2 = x2/2+box[0]*hRatio/2
img = img.crop((x1,y1,x2,y2))
#Resize the image with best quality algorithm ANTI-ALIAS
img.thumbnail(box, Image.ANTIALIAS)
if out:
#save it into a file-like object
img.save(out, "JPEG", quality=75)
else:
return img

View File

@@ -9,15 +9,12 @@ register_settings(
settings=[
{'name': u'IM_CONVERT_PATH', 'global_name': u'CONVERTER_IM_CONVERT_PATH', 'default': u'/usr/bin/convert', 'description': _(u'File path to imagemagick\'s convert program.'), 'exists': True},
{'name': u'IM_IDENTIFY_PATH', 'global_name': u'CONVERTER_IM_IDENTIFY_PATH', 'default': u'/usr/bin/identify', 'description': _(u'File path to imagemagick\'s identify program.'), 'exists': True},
{'name': u'UNPAPER_PATH', 'global_name': u'CONVERTER_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True},
{'name': u'GM_PATH', 'global_name': u'CONVERTER_GM_PATH', 'default': u'/usr/bin/gm', 'description': _(u'File path to graphicsmagick\'s program.'), 'exists': True},
{'name': u'GM_SETTINGS', 'global_name': u'CONVERTER_GM_SETTINGS', 'default': u''},
{'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.imagemagick', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick and converter.backends.graphicsmagick.')},
{'name': u'GRAPHICS_BACKEND', 'global_name': u'CONVERTER_GRAPHICS_BACKEND', 'default': u'converter.backends.python', 'description': _(u'Graphics conversion backend to use. Options are: converter.backends.imagemagick, converter.backends.graphicsmagick and converter.backends.python.')},
{'name': u'UNOCONV_PATH', 'global_name': u'CONVERTER_UNOCONV_PATH', 'default': u'/usr/bin/unoconv', 'exists': True},
{'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'},
{'name': u'DEFAULT_OPTIONS', 'global_name': u'CONVERTER_DEFAULT_OPTIONS', 'default': u''},
{'name': u'LOW_QUALITY_OPTIONS', 'global_name': u'CONVERTER_LOW_QUALITY_OPTIONS', 'default': u''},
{'name': u'HIGH_QUALITY_OPTIONS', 'global_name': u'CONVERTER_HIGH_QUALITY_OPTIONS', 'default': u'-density 400'},
{'name': u'PRINT_QUALITY_OPTIONS', 'global_name': u'CONVERTER_PRINT_QUALITY_OPTIONS', 'default': u'-density 500'},
#{'name': u'OCR_OPTIONS', 'global_name': u'CONVERTER_OCR_OPTIONS', 'default': u'-colorspace Gray -depth 8 -resample 200x200'},
#{'name': u'HIGH_QUALITY_OPTIONS', 'global_name': u'CONVERTER_HIGH_QUALITY_OPTIONS', 'default': u'-density 400'},
#{'name': u'PRINT_QUALITY_OPTIONS', 'global_name': u'CONVERTER_PRINT_QUALITY_OPTIONS', 'default': u'-density 500'},
]
)

View File

@@ -13,13 +13,6 @@ class UnknownFormat(ConvertError):
pass
class UnpaperError(ConvertError):
"""
Raised by unpaper
"""
pass
class IdentifyError(ConvertError):
"""
Raised by identify

View File

@@ -0,0 +1,46 @@
from django.utils.translation import ugettext_lazy as _
DEFAULT_ZOOM_LEVEL = 100
DEFAULT_ROTATION = 0
DEFAULT_PAGE_NUMBER = 1
DEFAULT_FILE_FORMAT = u'jpeg'
DIMENSION_SEPARATOR = u'x'
TRANSFORMATION_RESIZE = u'resize'
TRANSFORMATION_ROTATE = u'rotate'
TRANSFORMATION_DENSITY = u'density'
TRANSFORMATION_ZOOM = u'zoom'
TRANSFORMATION_CHOICES = {
TRANSFORMATION_RESIZE: {
'label': _(u'Resize'),
'description': _(u'Resize.'),
'arguments': [
{'name': 'width', 'label': _(u'width'), 'required': True},
{'name': 'height', 'label': _(u'height'), 'required': False},
]
},
TRANSFORMATION_ROTATE: {
'label': _(u'Rotate'),
'description': _(u'Rotate by n degress.'),
'arguments': [
{'name': 'degrees', 'label': _(u'degrees'), 'required': True}
]
},
TRANSFORMATION_DENSITY: {
'label': _(u'Density'),
'description': _(u'Change the resolution (ie: DPI) without resizing.'),
'arguments': [
{'name': 'width', 'label': _(u'width'), 'required': True},
{'name': 'height', 'label': _(u'height'), 'required': False},
]
},
TRANSFORMATION_ZOOM: {
'label': _(u'Zoom'),
'description': _(u'Zoom by n percent.'),
'arguments': [
{'name': 'percent', 'label': _(u'percent'), 'required': True}
]
},
}

View File

@@ -1,6 +1,10 @@
import os
from django.core.exceptions import ImproperlyConfigured
from django.utils.importlib import import_module
#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
def copyfile(source, dest, buffer_size=1024 * 1024):
"""
Copy a file from source to dest. source and dest
@@ -21,3 +25,60 @@ def copyfile(source, dest, buffer_size=1024 * 1024):
source.close()
dest.close()
def _lazy_load(fn):
_cached = []
def _decorated():
if not _cached:
_cached.append(fn())
return _cached[0]
return _decorated
@_lazy_load
def load_backend():
from converter.conf.settings import GRAPHICS_BACKEND as backend_name
try:
module = import_module('.base', 'converter.backends.%s' % backend_name)
import warnings
warnings.warn(
"Short names for CONVERTER_BACKEND are deprecated; prepend with 'converter.backends.'",
PendingDeprecationWarning
)
return module
except ImportError, e:
# Look for a fully qualified converter backend name
try:
return import_module('.base', backend_name)
except ImportError, e_user:
# The converter backend wasn't found. Display a helpful error message
# listing all possible (built-in) converter backends.
backend_dir = os.path.join(os.path.dirname(__file__), 'backends')
try:
available_backends = [f for f in os.listdir(backend_dir)
if os.path.isdir(os.path.join(backend_dir, f))
and not f.startswith('.')]
except EnvironmentError:
available_backends = []
available_backends.sort()
if backend_name not in available_backends:
error_msg = ("%r isn't an available converter backend. \n" +
"Try using converter.backends.XXX, where XXX is one of:\n %s\n" +
"Error was: %s") % \
(backend_name, ", ".join(map(repr, available_backends)), e_user)
raise ImproperlyConfigured(error_msg)
else:
raise # If there's some other error, this must be an error in Mayan itself.
def cleanup(filename):
"""
Tries to remove the given filename. Ignores non-existent files
"""
try:
os.remove(filename)
except OSError:
pass

View File

@@ -1,38 +1,18 @@
from django.utils.translation import ugettext_lazy as _
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.utils.importlib import import_module
from converter import backend
from converter.conf.settings import GRAPHICS_BACKEND
def _lazy_load(fn):
_cached = []
def _decorated():
if not _cached:
_cached.append(fn())
return _cached[0]
return _decorated
@_lazy_load
def _get_backend():
return import_module(GRAPHICS_BACKEND)
try:
backend = _get_backend()
except ImportError:
raise ImportError(u'Missing or incorrect converter backend: %s' % GRAPHICS_BACKEND)
def formats_list(request):
#check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])
context = {
'title': _(u'suported file formats'),
'hide_object': True,
'object_list': backend.get_format_list(),
'object_list': sorted(backend.get_format_list()),
'extra_columns': [
{
'name': _(u'name'),

View File

@@ -2,6 +2,7 @@ from django.utils.translation import ugettext_lazy as _
from django.core.urlresolvers import reverse
from django.conf import settings
from common.utils import validate_path
from navigation.api import register_links, register_top_menu, \
register_model_list_columns, register_multi_item_links, \
register_sidebar_template
@@ -13,9 +14,6 @@ from metadata.api import get_metadata_string
from documents.models import Document, DocumentPage, \
DocumentPageTransformation, DocumentType, DocumentTypeFilename
from documents.staging import StagingFile
from documents.conf.settings import USE_STAGING_DIRECTORY
from documents.conf.settings import PER_USER_STAGING_DIRECTORY
from documents.literals import PERMISSION_DOCUMENT_CREATE, \
PERMISSION_DOCUMENT_PROPERTIES_EDIT, PERMISSION_DOCUMENT_VIEW, \
PERMISSION_DOCUMENT_DELETE, PERMISSION_DOCUMENT_DOWNLOAD, \
@@ -27,30 +25,9 @@ from documents.literals import HISTORY_DOCUMENT_CREATED, \
HISTORY_DOCUMENT_EDITED, HISTORY_DOCUMENT_DELETED
from documents.conf.settings import ZOOM_MAX_LEVEL
from documents.conf.settings import ZOOM_MIN_LEVEL
from documents.conf.settings import CACHE_PATH
from documents.widgets import document_thumbnail
# Permission setup
set_namespace_title('documents', _(u'Documents'))
register_permission(PERMISSION_DOCUMENT_CREATE)
register_permission(PERMISSION_DOCUMENT_PROPERTIES_EDIT)
register_permission(PERMISSION_DOCUMENT_EDIT)
register_permission(PERMISSION_DOCUMENT_VIEW)
register_permission(PERMISSION_DOCUMENT_DELETE)
register_permission(PERMISSION_DOCUMENT_DOWNLOAD)
register_permission(PERMISSION_DOCUMENT_TRANSFORM)
register_permission(PERMISSION_DOCUMENT_TOOLS)
# Document type permissions
register_permission(PERMISSION_DOCUMENT_TYPE_EDIT)
register_permission(PERMISSION_DOCUMENT_TYPE_DELETE)
register_permission(PERMISSION_DOCUMENT_TYPE_CREATE)
# History setup
register_history_type(HISTORY_DOCUMENT_CREATED)
register_history_type(HISTORY_DOCUMENT_EDITED)
register_history_type(HISTORY_DOCUMENT_DELETED)
# Document page links expressions
def is_first_page(context):
return context['object'].page_number <= 1
@@ -67,6 +44,28 @@ def is_min_zoom(context):
def is_max_zoom(context):
return context['zoom'] >= ZOOM_MAX_LEVEL
# Permission setup
set_namespace_title('documents', _(u'Documents'))
register_permission(PERMISSION_DOCUMENT_CREATE)
register_permission(PERMISSION_DOCUMENT_PROPERTIES_EDIT)
register_permission(PERMISSION_DOCUMENT_EDIT)
register_permission(PERMISSION_DOCUMENT_VIEW)
register_permission(PERMISSION_DOCUMENT_DELETE)
register_permission(PERMISSION_DOCUMENT_DOWNLOAD)
register_permission(PERMISSION_DOCUMENT_TRANSFORM)
register_permission(PERMISSION_DOCUMENT_TOOLS)
# Document type permissions
set_namespace_title('documents_setup', _(u'Documents setup'))
register_permission(PERMISSION_DOCUMENT_TYPE_EDIT)
register_permission(PERMISSION_DOCUMENT_TYPE_DELETE)
register_permission(PERMISSION_DOCUMENT_TYPE_CREATE)
# History setup
register_history_type(HISTORY_DOCUMENT_CREATED)
register_history_type(HISTORY_DOCUMENT_EDITED)
register_history_type(HISTORY_DOCUMENT_DELETED)
document_list = {'text': _(u'all documents'), 'view': 'document_list', 'famfam': 'page', 'permissions': [PERMISSION_DOCUMENT_VIEW]}
document_list_recent = {'text': _(u'recent documents'), 'view': 'document_list_recent', 'famfam': 'page', 'permissions': [PERMISSION_DOCUMENT_VIEW]}
document_create_multiple = {'text': _(u'upload new documents'), 'view': 'document_create_multiple', 'famfam': 'page_add', 'permissions': [PERMISSION_DOCUMENT_CREATE]}
@@ -107,13 +106,6 @@ document_page_rotate_left = {'text': _(u'rotate left'), 'class': 'no-parent-hist
document_missing_list = {'text': _(u'Find missing document files'), 'view': 'document_missing_list', 'famfam': 'folder_page', 'permissions': [PERMISSION_DOCUMENT_VIEW]}
upload_document_from_local = {'text': _(u'local'), 'view': 'upload_document_from_local', 'famfam': 'drive_disk', 'keep_query': True}
upload_document_from_staging = {'text': _(u'staging'), 'view': 'upload_document_from_staging', 'famfam': 'drive_network', 'keep_query': True, 'condition': lambda x: USE_STAGING_DIRECTORY}
upload_document_from_user_staging = {'text': _(u'user staging'), 'view': 'upload_document_from_user_staging', 'famfam': 'drive_user', 'keep_query': True, 'condition': lambda x: PER_USER_STAGING_DIRECTORY}
staging_file_preview = {'text': _(u'preview'), 'class': 'fancybox-noscaling', 'view': 'staging_file_preview', 'args': ['source', 'object.id'], 'famfam': 'drive_magnify'}
staging_file_delete = {'text': _(u'delete'), 'view': 'staging_file_delete', 'args': ['source', 'object.id'], 'famfam': 'drive_delete'}
# Document type related links
document_type_list = {'text': _(u'document type list'), 'view': 'document_type_list', 'famfam': 'layout', 'permissions': [PERMISSION_DOCUMENT_VIEW]}
document_type_document_list = {'text': _(u'documents of this type'), 'view': 'document_type_document_list', 'args': 'object.id', 'famfam': 'page_go', 'permissions': [PERMISSION_DOCUMENT_VIEW]}
@@ -139,9 +131,12 @@ register_links(['document_type_filename_edit', 'document_type_filename_delete'],
# Register document links
register_links(Document, [document_edit, document_print, document_delete, document_download, document_find_duplicates, document_clear_transformations, document_create_siblings])
register_multi_item_links(['folder_view', 'index_instance_list', 'document_type_document_list', 'search', 'results', 'document_group_view', 'document_list', 'document_list_recent'], [document_multiple_clear_transformations, document_multiple_delete])
register_multi_item_links(['document_find_duplicates', 'folder_view', 'index_instance_list', 'document_type_document_list', 'search', 'results', 'document_group_view', 'document_list', 'document_list_recent'], [document_multiple_clear_transformations, document_multiple_delete])
register_links(['document_list_recent', 'document_list', 'document_create', 'document_create_multiple', 'upload_document', 'upload_document_from_local', 'upload_document_from_staging', 'upload_document_from_user_staging', 'document_find_duplicates'], [document_list_recent, document_list, document_create_multiple], menu_name='secondary_menu')
secondary_menu_links = [document_list_recent, document_list, document_create_multiple]
register_links(['document_list_recent', 'document_list', 'document_create', 'document_create_multiple', 'upload_interactive', 'staging_file_delete'], secondary_menu_links, menu_name='secondary_menu')
#register_links(Document, secondary_menu_links, menu_name='sidebar')
# Document page links
register_links(DocumentPage, [
@@ -157,17 +152,12 @@ register_links(DocumentPage, [
register_links(['document_page_view'], [document_page_rotate_left, document_page_rotate_right, document_page_zoom_in, document_page_zoom_out], menu_name='form_header')
# Upload sources
register_links(['upload_document_from_local', 'upload_document_from_staging', 'upload_document_from_user_staging'], [upload_document_from_local, upload_document_from_staging, upload_document_from_user_staging], menu_name='form_header')
register_links(DocumentPageTransformation, [document_page_transformation_edit, document_page_transformation_delete])
register_links(DocumentPageTransformation, [document_page_transformation_page_edit, document_page_transformation_page_view], menu_name='sidebar')
register_links('document_page_transformation_list', [document_page_transformation_create], menu_name='sidebar')
register_links('document_page_transformation_create', [document_page_transformation_create], menu_name='sidebar')
register_links(['document_page_transformation_edit', 'document_page_transformation_delete'], [document_page_transformation_page_transformation_list], menu_name='sidebar')
register_links(StagingFile, [staging_file_preview, staging_file_delete])
register_diagnostic('documents', _(u'Documents'), document_missing_list)
register_tool(document_find_all_duplicates, namespace='documents', title=_(u'documents'))
@@ -209,3 +199,5 @@ register_sidebar_template(['document_type_list'], 'document_types_help.html')
register_links(Document, [document_view_simple], menu_name='form_header', position=0)
register_links(Document, [document_view_advanced], menu_name='form_header', position=1)
register_links(Document, [document_history_view], menu_name='form_header')
validate_path(CACHE_PATH)

View File

@@ -2,8 +2,10 @@
import hashlib
import uuid
import os
from django.utils.translation import ugettext_lazy as _
from django.conf import settings
from storage.backends.filebasedstorage import FileBasedStorage
from smart_settings.api import register_settings
@@ -18,30 +20,15 @@ def default_uuid():
"""unicode(uuid.uuid4())"""
return unicode(uuid.uuid4())
available_transformations = {
'rotate': {'label': _(u'Rotate [degrees]'), 'arguments': [{'name': 'degrees'}]}
}
register_settings(
namespace=u'documents',
module=u'documents.conf.settings',
settings=[
# Upload
{'name': u'USE_STAGING_DIRECTORY', 'global_name': u'DOCUMENTS_USE_STAGING_DIRECTORY', 'default': False},
{'name': u'STAGING_DIRECTORY', 'global_name': u'DOCUMENTS_STAGING_DIRECTORY', 'default': u'/tmp/mayan/staging', 'exists': True},
{'name': u'PER_USER_STAGING_DIRECTORY', 'global_name': u'DOCUMENTS_PER_USER_STAGING_DIRECTORY', 'default': False},
{'name': u'USER_STAGING_DIRECTORY_ROOT', 'global_name': u'DOCUMENTS_USER_STAGING_DIRECTORY_ROOT', 'default': u'/tmp/mayan/staging/users', 'exists': True},
{'name': u'USER_STAGING_DIRECTORY_EXPRESSION', 'global_name': u'DOCUMENTS_USER_STAGING_DIRECTORY_EXPRESSION', 'default': u'user.username'},
{'name': u'DELETE_STAGING_FILE_AFTER_UPLOAD', 'global_name': u'DOCUMENTS_DELETE_STAGING_FILE_AFTER_UPLOAD', 'default': False},
{'name': u'STAGING_FILES_PREVIEW_SIZE', 'global_name': u'DOCUMENTS_STAGING_FILES_PREVIEW_SIZE', 'default': u'640x480'},
# Saving
{'name': u'CHECKSUM_FUNCTION', 'global_name': u'DOCUMENTS_CHECKSUM_FUNCTION', 'default': default_checksum},
{'name': u'UUID_FUNCTION', 'global_name': u'DOCUMENTS_UUID_FUNCTION', 'default': default_uuid},
# Storage
{'name': u'STORAGE_BACKEND', 'global_name': u'DOCUMENTS_STORAGE_BACKEND', 'default': FileBasedStorage},
# Transformations
{'name': u'AVAILABLE_TRANSFORMATIONS', 'global_name': u'DOCUMENTS_AVAILABLE_TRANSFORMATIONS', 'default': available_transformations},
{'name': u'DEFAULT_TRANSFORMATIONS', 'global_name': u'DOCUMENTS_DEFAULT_TRANSFORMATIONS', 'default': []},
# Usage
{'name': u'PREVIEW_SIZE', 'global_name': u'DOCUMENTS_PREVIEW_SIZE', 'default': u'640x480'},
{'name': u'PRINT_SIZE', 'global_name': u'DOCUMENTS_PRINT_SIZE', 'default': u'1400'},
@@ -53,5 +40,7 @@ register_settings(
{'name': u'ZOOM_MAX_LEVEL', 'global_name': u'DOCUMENTS_ZOOM_MAX_LEVEL', 'default': 200, 'description': _(u'Maximum amount in percent (%) to allow user to zoom in a document page interactively.')},
{'name': u'ZOOM_MIN_LEVEL', 'global_name': u'DOCUMENTS_ZOOM_MIN_LEVEL', 'default': 50, 'description': _(u'Minimum amount in percent (%) to allow user to zoom out a document page interactively.')},
{'name': u'ROTATION_STEP', 'global_name': u'DOCUMENTS_ROTATION_STEP', 'default': 90, 'description': _(u'Amount in degrees to rotate a document page per user interaction.')},
#
{'name': u'CACHE_PATH', 'global_name': u'DOCUMENTS_CACHE_PATH', 'default': os.path.join(settings.PROJECT_ROOT, 'image_cache'), 'exists': True},
]
)

View File

@@ -186,21 +186,11 @@ class DocumentForm(forms.ModelForm):
queryset=filenames_qs,
required=False,
label=_(u'Quick document rename'))
# Put the expand field last in the field order list
expand_field_index = self.fields.keyOrder.index('expand')
expand_field = self.fields.keyOrder.pop(expand_field_index)
self.fields.keyOrder.append(expand_field)
new_filename = forms.CharField(
label=_('New document filename'), required=False
)
expand = forms.BooleanField(
label=_(u'Expand compressed files'), required=False,
help_text=ugettext(u'Upload a compressed file\'s contained files as individual documents')
)
class DocumentForm_edit(DocumentForm):
"""
@@ -208,12 +198,7 @@ class DocumentForm_edit(DocumentForm):
"""
class Meta:
model = Document
exclude = ('file', 'document_type', 'tags', 'expand')
def __init__(self, *args, **kwargs):
super(DocumentForm_edit, self).__init__(*args, **kwargs)
self.fields.pop('expand')
exclude = ('file', 'document_type', 'tags')
class DocumentPropertiesForm(DetailForm):
@@ -266,32 +251,6 @@ class PrintForm(forms.Form):
page_range = forms.CharField(label=_(u'Page range'), required=False)
class StagingDocumentForm(DocumentForm):
"""
Form that show all the files in the staging folder specified by the
StagingFile class passed as 'cls' argument
"""
def __init__(self, *args, **kwargs):
cls = kwargs.pop('cls')
super(StagingDocumentForm, self).__init__(*args, **kwargs)
try:
self.fields['staging_file_id'].choices = [
(staging_file.id, staging_file) for staging_file in cls.get_all()
]
except:
pass
# Put staging_list field first in the field order list
staging_list_index = self.fields.keyOrder.index('staging_file_id')
staging_list = self.fields.keyOrder.pop(staging_list_index)
self.fields.keyOrder.insert(0, staging_list)
staging_file_id = forms.ChoiceField(label=_(u'Staging file'))
class Meta(DocumentForm.Meta):
exclude = ('description', 'file', 'document_type', 'tags')
class DocumentTypeForm(forms.ModelForm):
"""
Model class form to create or edit a document type

View File

@@ -14,13 +14,9 @@ PERMISSION_DOCUMENT_DOWNLOAD = {'namespace': 'documents', 'name': 'document_down
PERMISSION_DOCUMENT_TRANSFORM = {'namespace': 'documents', 'name': 'document_transform', 'label': _(u'Transform documents')}
PERMISSION_DOCUMENT_TOOLS = {'namespace': 'documents', 'name': 'document_tools', 'label': _(u'Execute document modifying tools')}
PERMISSION_DOCUMENT_TYPE_EDIT = {'namespace': 'documents', 'name': 'document_type_edit', 'label': _(u'Edit document types')}
PERMISSION_DOCUMENT_TYPE_DELETE = {'namespace': 'documents', 'name': 'document_type_delete', 'label': _(u'Delete document types')}
PERMISSION_DOCUMENT_TYPE_CREATE = {'namespace': 'documents', 'name': 'document_type_create', 'label': _(u'Create document types')}
UPLOAD_SOURCE_LOCAL = u'local'
UPLOAD_SOURCE_STAGING = u'staging'
UPLOAD_SOURCE_USER_STAGING = u'user_staging'
PERMISSION_DOCUMENT_TYPE_EDIT = {'namespace': 'documents_setup', 'name': 'document_type_edit', 'label': _(u'Edit document types')}
PERMISSION_DOCUMENT_TYPE_DELETE = {'namespace': 'documents_setup', 'name': 'document_type_delete', 'label': _(u'Delete document types')}
PERMISSION_DOCUMENT_TYPE_CREATE = {'namespace': 'documents_setup', 'name': 'document_type_create', 'label': _(u'Create document types')}
HISTORY_DOCUMENT_CREATED = {
'namespace': 'documents', 'name': 'document_created',

View File

@@ -13,3 +13,24 @@ class RecentDocumentManager(models.Manager):
to_delete = self.model.objects.filter(user=user)[RECENT_COUNT:]
for recent_to_delete in to_delete:
recent_to_delete.delete()
class DocumentPageTransformationManager(models.Manager):
def get_for_document_page(self, document_page):
return self.model.objects.filter(document_page=document_page)
def get_for_document_page_as_list(self, document_page):
warnings = []
transformations = []
for transformation in self.get_for_document_page(document_page).values('transformation', 'arguments'):
try:
transformations.append(
{
'transformation': transformation['transformation'],
'arguments': eval(transformation['arguments'], {})
}
)
except Exception, e:
warnings.append(e)
return transformations, warnings

View File

@@ -1,26 +1,39 @@
import os
import tempfile
import hashlib
from django.db import models
from django.utils.translation import ugettext_lazy as _
from django.contrib.auth.models import User
from django.contrib.contenttypes import generic
from django.contrib.comments.models import Comment
from django.conf import settings
from taggit.managers import TaggableManager
from dynamic_search.api import register
from converter.api import get_page_count
from converter import TRANFORMATION_CHOICES
from converter.api import get_available_transformations_choices
from converter.api import create_image_cache_filename, convert
from converter.exceptions import UnknownFormat, UnkownConvertError
from documents.utils import get_document_mimetype
from documents.conf.settings import CHECKSUM_FUNCTION
from documents.conf.settings import UUID_FUNCTION
from documents.conf.settings import STORAGE_BACKEND
from documents.conf.settings import AVAILABLE_TRANSFORMATIONS
from documents.conf.settings import DEFAULT_TRANSFORMATIONS
from documents.managers import RecentDocumentManager
from documents.conf.settings import PREVIEW_SIZE
from documents.conf.settings import THUMBNAIL_SIZE
from documents.conf.settings import CACHE_PATH
available_transformations = ([(name, data['label']) for name, data in AVAILABLE_TRANSFORMATIONS.items()])
from documents.managers import RecentDocumentManager, \
DocumentPageTransformationManager
from documents.utils import document_save_to_temp_dir
from documents.literals import PICTURE_ERROR_SMALL, PICTURE_ERROR_MEDIUM, \
PICTURE_UNKNOWN_SMALL, PICTURE_UNKNOWN_MEDIUM
from converter.literals import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
DEFAULT_FILE_FORMAT, DEFAULT_PAGE_NUMBER
# document image cache name hash function
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
def get_filename_from_uuid(instance, filename):
@@ -92,7 +105,7 @@ class Document(models.Model):
mimetype, page count and transformation when originally created
"""
new_document = not self.pk
transformations = kwargs.pop('transformations', None)
super(Document, self).save(*args, **kwargs)
if new_document:
@@ -101,7 +114,8 @@ class Document(models.Model):
self.update_mimetype(save=False)
self.save()
self.update_page_count(save=False)
self.apply_default_transformations()
if transformations:
self.apply_default_transformations(transformations)
@models.permalink
def get_absolute_url(self):
@@ -195,21 +209,43 @@ class Document(models.Model):
exists in storage
"""
return self.file.storage.exists(self.file.path)
def apply_default_transformations(self):
def apply_default_transformations(self, transformations):
#Only apply default transformations on new documents
if DEFAULT_TRANSFORMATIONS and reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0:
for transformation in DEFAULT_TRANSFORMATIONS:
if 'name' in transformation:
for document_page in self.documentpage_set.all():
page_transformation = DocumentPageTransformation(
document_page=document_page,
order=0,
transformation=transformation['name'])
if 'arguments' in transformation:
page_transformation.arguments = transformation['arguments']
if reduce(lambda x, y: x + y, [page.documentpagetransformation_set.count() for page in self.documentpage_set.all()]) == 0:
for transformation in transformations:
for document_page in self.documentpage_set.all():
page_transformation = DocumentPageTransformation(
document_page=document_page,
order=0,
transformation=transformation.get('transformation'),
arguments=transformation.get('arguments')
)
page_transformation.save()
page_transformation.save()
def get_image_cache_name(self, page):
document_page = self.documentpage_set.get(page_number=page)
transformations, warnings = document_page.get_transformation_list()
hash_value = HASH_FUNCTION(u''.join([self.checksum, unicode(page), unicode(transformations)]))
cache_file_path = os.path.join(CACHE_PATH, hash_value)
if os.path.exists(cache_file_path):
return cache_file_path
else:
document_file = document_save_to_temp_dir(self, self.checksum)
return convert(document_file, output_filepath=cache_file_path, page=page, transformations=transformations)
def get_image(self, size=PREVIEW_SIZE, page=DEFAULT_PAGE_NUMBER, zoom=DEFAULT_ZOOM_LEVEL, rotation=DEFAULT_ROTATION):
try:
image_cache_name = self.get_image_cache_name(page=page)
output_file = convert(image_cache_name, cleanup_files=False, size=size, zoom=zoom, rotation=rotation)
except UnknownFormat:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_UNKNOWN_SMALL)
except UnkownConvertError:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_ERROR_SMALL)
except Exception, e:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_ERROR_SMALL)
return output_file
class DocumentTypeFilename(models.Model):
@@ -251,26 +287,13 @@ class DocumentPage(models.Model):
verbose_name = _(u'document page')
verbose_name_plural = _(u'document pages')
def get_transformation_list(self):
return DocumentPageTransformation.objects.get_for_document_page_as_list(self)
@models.permalink
def get_absolute_url(self):
return ('document_page_view', [self.pk])
def get_transformation_string(self):
transformation_list = []
warnings = []
for page_transformation in self.documentpagetransformation_set.all():
try:
if page_transformation.transformation in TRANFORMATION_CHOICES:
transformation_list.append(
TRANFORMATION_CHOICES[page_transformation.transformation] % eval(
page_transformation.arguments
)
)
except Exception, e:
warnings.append(e)
return u' '.join(transformation_list), warnings
class DocumentPageTransformation(models.Model):
"""
@@ -279,9 +302,11 @@ class DocumentPageTransformation(models.Model):
"""
document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page'))
order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True)
transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation'))
transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}'))
objects = DocumentPageTransformationManager()
def __unicode__(self):
return u'"%s" for %s' % (self.get_transformation_display(), unicode(self.document_page))

View File

@@ -1,6 +1,6 @@
from django.template import Library, Node, Variable
from converter.api import get_document_dimensions, QUALITY_PRINT
from converter.api import get_document_dimensions
from documents.views import calculate_converter_arguments
from documents.conf.settings import PRINT_SIZE
@@ -14,8 +14,7 @@ class GetImageSizeNode(Node):
def render(self, context):
document = Variable(self.document).resolve(context)
arguments, warnings = calculate_converter_arguments(document, size=PRINT_SIZE, quality=QUALITY_PRINT)
width, height = get_document_dimensions(document, **arguments)
width, height = get_document_dimensions(document)
context[u'document_width'], context['document_height'] = width, height
context[u'document_aspect'] = float(width) / float(height)
return u''

View File

@@ -1,24 +1,16 @@
from django.conf.urls.defaults import patterns, url
from converter.api import QUALITY_HIGH, QUALITY_PRINT
from documents.conf.settings import PREVIEW_SIZE
from documents.conf.settings import PRINT_SIZE
from documents.conf.settings import THUMBNAIL_SIZE
from documents.conf.settings import DISPLAY_SIZE
from documents.conf.settings import MULTIPAGE_PREVIEW_SIZE
from documents.literals import UPLOAD_SOURCE_LOCAL, \
UPLOAD_SOURCE_STAGING, UPLOAD_SOURCE_USER_STAGING
urlpatterns = patterns('documents.views',
url(r'^list/$', 'document_list', (), 'document_list'),
url(r'^list/recent/$', 'document_list_recent', (), 'document_list_recent'),
url(r'^create/from/local/multiple/$', 'document_create', (), 'document_create_multiple'),
url(r'^upload/local/$', 'upload_document_with_type', {'source': UPLOAD_SOURCE_LOCAL}, 'upload_document_from_local'),
url(r'^upload/staging/$', 'upload_document_with_type', {'source': UPLOAD_SOURCE_STAGING}, 'upload_document_from_staging'),
url(r'^upload/staging/user/$', 'upload_document_with_type', {'source': UPLOAD_SOURCE_USER_STAGING}, 'upload_document_from_user_staging'),
url(r'^(?P<document_id>\d+)/view/$', 'document_view', (), 'document_view_simple'),
url(r'^(?P<document_id>\d+)/view/advanced/$', 'document_view', {'advanced': True}, 'document_view_advanced'),
url(r'^(?P<document_id>\d+)/delete/$', 'document_delete', (), 'document_delete'),
@@ -30,8 +22,8 @@ urlpatterns = patterns('documents.views',
url(r'^(?P<document_id>\d+)/display/preview/$', 'get_document_image', {'size': PREVIEW_SIZE}, 'document_preview'),
url(r'^(?P<document_id>\d+)/display/preview/multipage/$', 'get_document_image', {'size': MULTIPAGE_PREVIEW_SIZE}, 'document_preview_multipage'),
url(r'^(?P<document_id>\d+)/display/thumbnail/$', 'get_document_image', {'size': THUMBNAIL_SIZE}, 'document_thumbnail'),
url(r'^(?P<document_id>\d+)/display/$', 'get_document_image', {'size': DISPLAY_SIZE, 'quality': QUALITY_HIGH}, 'document_display'),
url(r'^(?P<document_id>\d+)/display/print/$', 'get_document_image', {'size': PRINT_SIZE, 'quality': QUALITY_PRINT}, 'document_display_print'),
url(r'^(?P<document_id>\d+)/display/$', 'get_document_image', {'size': DISPLAY_SIZE}, 'document_display'),
url(r'^(?P<document_id>\d+)/display/print/$', 'get_document_image', {'size': PRINT_SIZE}, 'document_display_print'),
url(r'^(?P<document_id>\d+)/download/$', 'document_download', (), 'document_download'),
url(r'^(?P<document_id>\d+)/create/siblings/$', 'document_create_siblings', (), 'document_create_siblings'),
@@ -41,9 +33,6 @@ urlpatterns = patterns('documents.views',
url(r'^multiple/clear_transformations/$', 'document_multiple_clear_transformations', (), 'document_multiple_clear_transformations'),
url(r'^duplicates/list/$', 'document_find_all_duplicates', (), 'document_find_all_duplicates'),
url(r'^staging_file/type/(?P<source>\w+)/(?P<staging_file_id>\w+)/preview/$', 'staging_file_preview', (), 'staging_file_preview'),
url(r'^staging_file/type/(?P<source>\w+)/(?P<staging_file_id>\w+)/delete/$', 'staging_file_delete', (), 'staging_file_delete'),
url(r'^page/(?P<document_page_id>\d+)/$', 'document_page_view', (), 'document_page_view'),
url(r'^page/(?P<document_page_id>\d+)/text/$', 'document_page_text', (), 'document_page_text'),
url(r'^page/(?P<document_page_id>\d+)/edit/$', 'document_page_edit', (), 'document_page_edit'),

View File

@@ -1,6 +1,6 @@
import os
from common import TEMPORARY_DIRECTORY
from common.conf.settings import TEMPORARY_DIRECTORY
try:
from python_magic import magic

View File

@@ -1,5 +1,4 @@
import os
import zipfile
import urlparse
import copy
@@ -13,7 +12,6 @@ from django.core.urlresolvers import reverse
from django.views.generic.create_update import delete_object, update_object
from django.conf import settings
from django.utils.http import urlencode
from django.core.files.uploadedfile import SimpleUploadedFile
import sendfile
from common.utils import pretty_size, parse_range, urlquote, \
@@ -22,10 +20,8 @@ from common.widgets import two_state_template
from common.literals import PAGE_SIZE_DIMENSIONS, \
PAGE_ORIENTATION_PORTRAIT, PAGE_ORIENTATION_LANDSCAPE
from common.conf.settings import DEFAULT_PAPER_SIZE
from converter.api import convert_document, QUALITY_DEFAULT
from converter.exceptions import UnkownConvertError, UnknownFormat
from converter.api import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
DEFAULT_FILE_FORMAT, QUALITY_PRINT
from converter.literals import DEFAULT_ZOOM_LEVEL, DEFAULT_ROTATION, \
DEFAULT_FILE_FORMAT, DEFAULT_PAGE_NUMBER
from filetransfers.api import serve_file
from grouping.utils import get_document_group_subtemplate
from metadata.api import save_metadata_list, \
@@ -36,10 +32,6 @@ from permissions.api import check_permissions
from document_indexing.api import update_indexes, delete_indexes
from history.api import create_history
from documents.conf.settings import DELETE_STAGING_FILE_AFTER_UPLOAD
from documents.conf.settings import USE_STAGING_DIRECTORY
from documents.conf.settings import PER_USER_STAGING_DIRECTORY
from documents.conf.settings import PREVIEW_SIZE
from documents.conf.settings import THUMBNAIL_SIZE
from documents.conf.settings import STORAGE_BACKEND
@@ -61,7 +53,7 @@ from documents.literals import HISTORY_DOCUMENT_CREATED, \
from documents.forms import DocumentTypeSelectForm, \
DocumentForm, DocumentForm_edit, DocumentPropertiesForm, \
StagingDocumentForm, DocumentPreviewForm, \
DocumentPreviewForm, \
DocumentPageForm, DocumentPageTransformationForm, \
DocumentContentForm, DocumentPageForm_edit, \
DocumentPageForm_text, PrintForm, DocumentTypeForm, \
@@ -69,11 +61,8 @@ from documents.forms import DocumentTypeSelectForm, \
from documents.wizards import DocumentCreateWizard
from documents.models import Document, DocumentType, DocumentPage, \
DocumentPageTransformation, RecentDocument, DocumentTypeFilename
from documents.staging import create_staging_file_class
from documents.literals import PICTURE_ERROR_SMALL, PICTURE_ERROR_MEDIUM, \
PICTURE_UNKNOWN_SMALL, PICTURE_UNKNOWN_MEDIUM
from documents.literals import UPLOAD_SOURCE_LOCAL, \
UPLOAD_SOURCE_STAGING, UPLOAD_SOURCE_USER_STAGING
# Document type permissions
from documents.literals import PERMISSION_DOCUMENT_TYPE_EDIT, \
@@ -116,171 +105,10 @@ def document_create_siblings(request, document_id):
if document.document_type_id:
query_dict['document_type_id'] = document.document_type_id
url = reverse('upload_document_from_local')
url = reverse('upload_interactive')
return HttpResponseRedirect('%s?%s' % (url, urlencode(query_dict)))
def _handle_save_document(request, document, form=None):
RecentDocument.objects.add_document_for_user(request.user, document)
if form:
if form.cleaned_data['new_filename']:
document.file_filename = form.cleaned_data['new_filename']
document.save()
if form and 'document_type_available_filenames' in form.cleaned_data:
if form.cleaned_data['document_type_available_filenames']:
document.file_filename = form.cleaned_data['document_type_available_filenames'].filename
document.save()
save_metadata_list(decode_metadata_from_url(request.GET), document, create=True)
warnings = update_indexes(document)
if request.user.is_staff or request.user.is_superuser:
for warning in warnings:
messages.warning(request, warning)
create_history(HISTORY_DOCUMENT_CREATED, document, {'user': request.user})
def _handle_zip_file(request, uploaded_file, document_type=None):
filename = getattr(uploaded_file, 'filename', getattr(uploaded_file, 'name', ''))
if filename.lower().endswith('zip'):
zfobj = zipfile.ZipFile(uploaded_file)
for filename in zfobj.namelist():
if not filename.endswith('/'):
zip_document = Document(file=SimpleUploadedFile(
name=filename, content=zfobj.read(filename)))
if document_type:
zip_document.document_type = document_type
zip_document.save()
_handle_save_document(request, zip_document)
messages.success(request, _(u'Extracted file: %s, uploaded successfully.') % filename)
#Signal that uploaded file was a zip file
return True
else:
#Otherwise tell parent to handle file
return False
def upload_document_with_type(request, source):
check_permissions(request.user, [PERMISSION_DOCUMENT_CREATE])
document_type_id = request.GET.get('document_type_id', None)
if document_type_id:
document_type = get_object_or_404(DocumentType, pk=document_type_id[0])
else:
document_type = None
if request.method == 'POST':
if source == UPLOAD_SOURCE_LOCAL:
form = DocumentForm(request.POST, request.FILES, document_type=document_type)
if form.is_valid():
try:
expand = form.cleaned_data['expand']
if (not expand) or (expand and not _handle_zip_file(request, request.FILES['file'], document_type)):
instance = form.save()
instance.save()
if document_type:
instance.document_type = document_type
_handle_save_document(request, instance, form)
messages.success(request, _(u'Document uploaded successfully.'))
except Exception, e:
messages.error(request, e)
return HttpResponseRedirect(request.get_full_path())
elif (USE_STAGING_DIRECTORY and source == UPLOAD_SOURCE_STAGING) or (PER_USER_STAGING_DIRECTORY and source == UPLOAD_SOURCE_USER_STAGING):
StagingFile = create_staging_file_class(request, source)
form = StagingDocumentForm(request.POST,
request.FILES, cls=StagingFile,
document_type=document_type)
if form.is_valid():
try:
staging_file = StagingFile.get(form.cleaned_data['staging_file_id'])
expand = form.cleaned_data['expand']
if (not expand) or (expand and not _handle_zip_file(request, staging_file.upload(), document_type)):
document = Document(file=staging_file.upload())
if document_type:
document.document_type = document_type
document.save()
_handle_save_document(request, document, form)
messages.success(request, _(u'Staging file: %s, uploaded successfully.') % staging_file.filename)
if DELETE_STAGING_FILE_AFTER_UPLOAD:
staging_file.delete()
messages.success(request, _(u'Staging file: %s, deleted successfully.') % staging_file.filename)
except Exception, e:
messages.error(request, e)
return HttpResponseRedirect(request.META['HTTP_REFERER'])
else:
if source == UPLOAD_SOURCE_LOCAL:
form = DocumentForm(document_type=document_type)
elif (USE_STAGING_DIRECTORY and source == UPLOAD_SOURCE_STAGING) or (PER_USER_STAGING_DIRECTORY and source == UPLOAD_SOURCE_USER_STAGING):
StagingFile = create_staging_file_class(request, source)
form = StagingDocumentForm(cls=StagingFile,
document_type=document_type)
subtemplates_list = []
if source == UPLOAD_SOURCE_LOCAL:
subtemplates_list.append({
'name': 'generic_form_subtemplate.html',
'context': {
'form': form,
'title': _(u'upload a local document'),
},
})
elif (USE_STAGING_DIRECTORY and source == UPLOAD_SOURCE_STAGING) or (PER_USER_STAGING_DIRECTORY and source == UPLOAD_SOURCE_USER_STAGING):
if source == UPLOAD_SOURCE_STAGING:
form_title = _(u'upload a document from staging')
list_title = _(u'files in staging')
else:
form_title = _(u'upload a document from user staging')
list_title = _(u'files in user staging')
try:
staging_filelist = StagingFile.get_all()
except Exception, e:
messages.error(request, e)
staging_filelist = []
finally:
subtemplates_list = [
{
'name': 'generic_form_subtemplate.html',
'context': {
'form': form,
'title': form_title,
}
},
{
'name': 'generic_list_subtemplate.html',
'context': {
'title': list_title,
'object_list': staging_filelist,
'hide_link': True,
}
},
]
context = {
'source': source,
'document_type_id': document_type_id,
'subtemplates_list': subtemplates_list,
'sidebar_subtemplates_list': [
{
'name': 'generic_subtemplate.html',
'context': {
'title': _(u'Current metadata'),
'paragraphs': metadata_repr_as_list(decode_metadata_from_url(request.GET)),
'side_bar': True,
}
}]
}
return render_to_response('generic_form.html', context,
context_instance=RequestContext(request))
def document_view(request, document_id, advanced=False):
check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])
#document = get_object_or_404(Document.objects.select_related(), pk=document_id)
@@ -456,38 +284,14 @@ def document_edit(request, document_id):
}, context_instance=RequestContext(request))
def calculate_converter_arguments(document, *args, **kwargs):
size = kwargs.pop('size', PREVIEW_SIZE)
quality = kwargs.pop('quality', QUALITY_DEFAULT)
page = kwargs.pop('page', 1)
file_format = kwargs.pop('file_format', DEFAULT_FILE_FORMAT)
zoom = kwargs.pop('zoom', DEFAULT_ZOOM_LEVEL)
rotation = kwargs.pop('rotation', DEFAULT_ROTATION)
document_page = DocumentPage.objects.get(document=document, page_number=page)
transformation_string, warnings = document_page.get_transformation_string()
arguments = {
'size': size,
'file_format': file_format,
'quality': quality,
'extra_options': transformation_string,
'page': page - 1,
'zoom': zoom,
'rotation': rotation
}
return arguments, warnings
def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_DEFAULT):
def get_document_image(request, document_id, size=PREVIEW_SIZE):
check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])
document = get_object_or_404(Document, pk=document_id)
page = int(request.GET.get('page', 1))
page = int(request.GET.get('page', DEFAULT_PAGE_NUMBER))
zoom = int(request.GET.get('zoom', 100))
zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))
if zoom < ZOOM_MIN_LEVEL:
zoom = ZOOM_MIN_LEVEL
@@ -495,37 +299,9 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
if zoom > ZOOM_MAX_LEVEL:
zoom = ZOOM_MAX_LEVEL
rotation = int(request.GET.get('rotation', 0)) % 360
rotation = int(request.GET.get('rotation', DEFAULT_ROTATION)) % 360
arguments, warnings = calculate_converter_arguments(document, size=size, file_format=DEFAULT_FILE_FORMAT, quality=quality, page=page, zoom=zoom, rotation=rotation)
if warnings and (request.user.is_staff or request.user.is_superuser):
for warning in warnings:
messages.warning(request, _(u'Page transformation error: %s') % warning)
try:
output_file = convert_document(document, **arguments)
except UnkownConvertError, e:
if request.user.is_staff or request.user.is_superuser:
messages.error(request, e)
if size == THUMBNAIL_SIZE:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_ERROR_SMALL)
else:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_ERROR_MEDIUM)
except UnknownFormat:
if size == THUMBNAIL_SIZE:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_UNKNOWN_SMALL)
else:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_UNKNOWN_MEDIUM)
except Exception, e:
if request.user.is_staff or request.user.is_superuser:
messages.error(request, e)
if size == THUMBNAIL_SIZE:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_ERROR_SMALL)
else:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_ERROR_MEDIUM)
finally:
return sendfile.sendfile(request, output_file)
return sendfile.sendfile(request, document.get_image(size=size, page=page, zoom=zoom, rotation=rotation))
def document_download(request, document_id):
@@ -546,58 +322,6 @@ def document_download(request, document_id):
return HttpResponseRedirect(request.META['HTTP_REFERER'])
def staging_file_preview(request, source, staging_file_id):
check_permissions(request.user, [PERMISSION_DOCUMENT_CREATE])
StagingFile = create_staging_file_class(request, source)
try:
output_file, errors = StagingFile.get(staging_file_id).preview()
if errors and (request.user.is_staff or request.user.is_superuser):
for error in errors:
messages.warning(request, _(u'Staging file transformation error: %(error)s') % {
'error': error
})
except UnkownConvertError, e:
if request.user.is_staff or request.user.is_superuser:
messages.error(request, e)
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_ERROR_MEDIUM)
except UnknownFormat:
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_UNKNOWN_MEDIUM)
except Exception, e:
if request.user.is_staff or request.user.is_superuser:
messages.error(request, e)
output_file = os.path.join(settings.MEDIA_ROOT, u'images', PICTURE_ERROR_MEDIUM)
finally:
return sendfile.sendfile(request, output_file)
def staging_file_delete(request, source, staging_file_id):
check_permissions(request.user, [PERMISSION_DOCUMENT_CREATE])
StagingFile = create_staging_file_class(request, source)
staging_file = StagingFile.get(staging_file_id)
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
if request.method == 'POST':
try:
staging_file.delete()
messages.success(request, _(u'Staging file delete successfully.'))
except Exception, e:
messages.error(request, e)
return HttpResponseRedirect(next)
return render_to_response('generic_confirm.html', {
'source': source,
'delete_view': True,
'object': staging_file,
'next': next,
'previous': previous,
'form_icon': u'drive_delete.png',
}, context_instance=RequestContext(request))
def document_page_transformation_list(request, document_page_id):
check_permissions(request.user, [PERMISSION_DOCUMENT_TRANSFORM])
@@ -689,10 +413,14 @@ def document_find_duplicates(request, document_id):
check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])
document = get_object_or_404(Document, pk=document_id)
return _find_duplicate_list(request, [document], include_source=True, confirmation=False)
extra_context = {
'title': _(u'duplicates of: %s') % document,
'object': document,
}
return _find_duplicate_list(request, [document], include_source=True, confirmation=False, extra_context=extra_context)
def _find_duplicate_list(request, source_document_list=Document.objects.all(), include_source=False, confirmation=True):
def _find_duplicate_list(request, source_document_list=Document.objects.all(), include_source=False, confirmation=True, extra_context=None):
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
if confirmation and request.method != 'POST':
@@ -712,10 +440,18 @@ def _find_duplicate_list(request, source_document_list=Document.objects.all(), i
if include_source and results:
duplicated.append(document.pk)
return render_to_response('generic_list.html', {
context = {
'object_list': Document.objects.filter(pk__in=duplicated),
'title': _(u'duplicated documents'),
}, context_instance=RequestContext(request))
'hide_links': True,
'multi_select_as_buttons': True,
}
if extra_context:
context.update(extra_context)
return render_to_response('generic_list.html', context,
context_instance=RequestContext(request))
def document_find_all_duplicates(request):
@@ -802,13 +538,13 @@ def document_page_view(request, document_page_id):
document_page = get_object_or_404(DocumentPage, pk=document_page_id)
zoom = int(request.GET.get('zoom', 100))
rotation = int(request.GET.get('rotation', 0))
zoom = int(request.GET.get('zoom', DEFAULT_ZOOM_LEVEL))
rotation = int(request.GET.get('rotation', DEFAULT_ROTATION))
document_page_form = DocumentPageForm(instance=document_page, zoom=zoom, rotation=rotation)
base_title = _(u'details for: %s') % document_page
if zoom != 100:
if zoom != DEFAULT_ZOOM_LEVEL:
zoom_text = u'(%d%%)' % zoom
else:
zoom_text = u''
@@ -1036,13 +772,14 @@ def document_print(request, document_id):
def document_hard_copy(request, document_id):
#TODO: FIXME
check_permissions(request.user, [PERMISSION_DOCUMENT_VIEW])
document = get_object_or_404(Document, pk=document_id)
RecentDocument.objects.add_document_for_user(request.user, document)
arguments, warnings = calculate_converter_arguments(document, size=PRINT_SIZE, file_format=DEFAULT_FILE_FORMAT, quality=QUALITY_PRINT)
arguments, warnings = calculate_converter_arguments(document, size=PRINT_SIZE, file_format=DEFAULT_FILE_FORMAT)
# Pre-generate
convert_document(document, **arguments)

View File

@@ -30,7 +30,6 @@ class DocumentCreateWizard(BoundFormWizard):
def __init__(self, *args, **kwargs):
self.query_dict = {}
self.multiple = kwargs.pop('multiple', True)
self.step_titles = kwargs.pop('step_titles', [
_(u'step 1 of 3: Document type'),
_(u'step 2 of 3: Metadata selection'),
@@ -75,13 +74,8 @@ class DocumentCreateWizard(BoundFormWizard):
return 'generic_wizard.html'
def done(self, request, form_list):
if self.multiple:
view = 'upload_document_from_local'
else:
view = 'upload_document'
if self.document_type:
self.query_dict['document_type_id'] = self.document_type.pk
url = urlquote(reverse(view), self.query_dict)
url = urlquote(reverse('upload_interactive'), self.query_dict)
return HttpResponseRedirect(url)

View File

@@ -9,6 +9,7 @@ from converter import formats_list
from documents import document_type_views
from metadata import setup_metadata_type_list, metadata_type_setup_views
from metadata import setup_metadata_set_list, metadata_set_setup_views
from sources import source_list, source_views
from main.conf.settings import SIDE_BAR_SEARCH
from main.conf.settings import DISABLE_HOME_VIEW
@@ -45,18 +46,19 @@ if not SIDE_BAR_SEARCH:
register_top_menu('tools', link=tools_menu, children_views=['statistics', 'history_list', 'formats_list'])
#register_top_menu('setup_menu', link={'text': _(u'setup'), 'view': 'setting_list', 'famfam': 'cog'}, children=setup_views)
register_top_menu('setup_menu', link={'text': _(u'setup'), 'view': 'setting_list', 'famfam': 'cog'}, children_path_regex=[r'^settings/', r'^user_management/', r'^permissions', r'^documents/type', r'^metadata/setup'])
register_top_menu('setup_menu', link={'text': _(u'setup'), 'view': 'setting_list', 'famfam': 'cog'}, children_path_regex=[r'^settings/', r'^user_management/', r'^permissions', r'^documents/type', r'^metadata/setup', r'sources/setup'])
register_top_menu('about', link={'text': _(u'about'), 'view': 'about', 'famfam': 'information'})
register_links(['tools_menu', 'statistics', 'history_list', 'history_view', 'formats_list'], [tools_menu, statistics, history_list, formats_list, sentry], menu_name='secondary_menu')
setup_links = [check_settings, role_list, user_list, group_list, document_types, setup_metadata_type_list, setup_metadata_set_list, admin_site]
setup_links = [check_settings, role_list, user_list, group_list, document_types, setup_metadata_type_list, setup_metadata_set_list, source_list, admin_site]
register_links(['setting_list'], setup_links, menu_name='secondary_menu')
register_links(permission_views, setup_links, menu_name='secondary_menu')
register_links(user_management_views, setup_links, menu_name='secondary_menu')
register_links(document_type_views, setup_links, menu_name='secondary_menu')
register_links(metadata_type_setup_views, setup_links, menu_name='secondary_menu')
register_links(metadata_set_setup_views, setup_links, menu_name='secondary_menu')
register_links(source_views, setup_links, menu_name='secondary_menu')
def get_version():

View File

@@ -5,6 +5,8 @@
{% load settings %}
{% load search_tags %}
{% load main_settings_tags %}
{% load variable_tags %}
{% block web_theme_head %}
{% if new_window_url %}
<script type="text/javascript">
@@ -154,23 +156,47 @@
{% endblock %}
{% block web_theme_secondary_navigation %}
{% get_object_navigation_links "form_header" as form_navigation_links %}
{% if form_navigation_links %}
<div class="secondary-navigation">
<ul class="wat-cf">
{% with form_navigation_links as object_navigation_links %}
{% with "true" as as_li %}
{% with "true" as hide_active_anchor %}
{% with "active" as li_class_active %}
{% with "first" as li_class_first %}
{% include "generic_navigation.html" %}
{% endwith %}
{% endwith %}
{% endwith %}
{% endwith %}
{% endwith %}
</ul>
</div>
{% if navigation_object_list %}
{% for navigation_object_dict in navigation_object_list %}
{% copy_variable navigation_object_dict.object as "navigation_object_name" %}
{% get_object_navigation_links "form_header" as form_navigation_links %}
{% if form_navigation_links %}
<div class="secondary-navigation">
<ul class="wat-cf">
{% with form_navigation_links as object_navigation_links %}
{% with "true" as as_li %}
{% with "true" as hide_active_anchor %}
{% with "active" as li_class_active %}
{% with "first" as li_class_first %}
{% include "generic_navigation.html" %}
{% endwith %}
{% endwith %}
{% endwith %}
{% endwith %}
{% endwith %}
</ul>
</div>
{% endif %}
{% endfor %}
{% else %}
{% get_object_navigation_links "form_header" as form_navigation_links %}
{% if form_navigation_links %}
<div class="secondary-navigation">
<ul class="wat-cf">
{% with form_navigation_links as object_navigation_links %}
{% with "true" as as_li %}
{% with "true" as hide_active_anchor %}
{% with "active" as li_class_active %}
{% with "first" as li_class_first %}
{% include "generic_navigation.html" %}
{% endwith %}
{% endwith %}
{% endwith %}
{% endwith %}
{% endwith %}
</ul>
</div>
{% endif %}
{% endif %}
{% endblock %}
@@ -199,27 +225,51 @@
</ul>
</div>
{% endif %}
{% get_object_navigation_links as object_navigation_links %}
{% if object_navigation_links %}
<div class="block">
{% if object %}
{% if object_name %}
<h3>{% blocktrans %}Actions for {{ object_name }}: {{ object }}{% endblocktrans %}</h3>
{% if navigation_object_list %}
{% for navigation_object_dict in navigation_object_list %}
{% copy_variable navigation_object_dict.object as "navigation_object_name" %}
{% get_object_navigation_links as object_navigation_links %}
{% if object_navigation_links %}
<div class="block">
{% if navigation_object %}
{% if navigation_object_dict.name %}
<h3>{% blocktrans with navigation_object_dict.name as name %}Actions for {{ name }}: {{ navigation_object }}{% endblocktrans %}</h3>
{% else %}
<h3>{% blocktrans %}Actions for: {{ navigation_object }}{% endblocktrans %}</h3>
{% endif %}
{% else %}
<h3>{% trans "Available actions" %}</h3>
{% endif %}
<ul class="navigation">
{% with "true" as as_li %}
{% include "generic_navigation.html" %}
{% endwith %}
</ul>
</div>
{% endif %}
{% endfor %}
{% else %}
{% get_object_navigation_links as object_navigation_links %}
{% if object_navigation_links %}
<div class="block">
{% if navigation_object %}
{% if object_name %}
<h3>{% blocktrans %}Actions for {{ object_name }}: {{ navigation_object }}{% endblocktrans %}</h3>
{% else %}
<h3>{% blocktrans %}Actions for: {{ navigation_object }}{% endblocktrans %}</h3>
{% endif %}
{% else %}
<h3>{% blocktrans %}Actions for: {{ object }}{% endblocktrans %}</h3>
<h3>{% trans "Actions" %}</h3>
{% endif %}
{% else %}
<h3>}{% trans "Actions" %}</h3>
{% endif %}
<ul class="navigation">
{% with "true" as as_li %}
{% include "generic_navigation.html" %}
{% endwith %}
</ul>
</div>
<ul class="navigation">
{% with "true" as as_li %}
{% include "generic_navigation.html" %}
{% endwith %}
</ul>
</div>
{% endif %}
{% endif %}
{% get_object_navigation_links "sidebar" as object_navigation_links %}
{% if object_navigation_links %}
<div class="block">

View File

@@ -52,17 +52,17 @@ setup_metadata_type_edit = {'text': _(u'edit'), 'view': 'setup_metadata_type_edi
setup_metadata_type_delete = {'text': _(u'delete'), 'view': 'setup_metadata_type_delete', 'args': 'object.pk', 'famfam': 'xhtml_delete', 'permissions': [PERMISSION_METADATA_TYPE_DELETE]}
setup_metadata_type_create = {'text': _(u'create new'), 'view': 'setup_metadata_type_create', 'famfam': 'xhtml_add', 'permissions': [PERMISSION_METADATA_TYPE_CREATE]}
setup_metadata_set_list = {'text': _(u'metadata sets'), 'view': 'setup_metadata_set_list', 'famfam': 'application_form', 'permissions': [PERMISSION_METADATA_SET_VIEW]}
setup_metadata_set_edit = {'text': _(u'edit'), 'view': 'setup_metadata_set_edit', 'args': 'object.pk', 'famfam': 'application_form_edit', 'permissions': [PERMISSION_METADATA_SET_EDIT]}
setup_metadata_set_delete = {'text': _(u'delete'), 'view': 'setup_metadata_set_delete', 'args': 'object.pk', 'famfam': 'application_form_delete', 'permissions': [PERMISSION_METADATA_SET_DELETE]}
setup_metadata_set_create = {'text': _(u'create new'), 'view': 'setup_metadata_set_create', 'famfam': 'application_form_add', 'permissions': [PERMISSION_METADATA_SET_CREATE]}
setup_metadata_set_list = {'text': _(u'metadata sets'), 'view': 'setup_metadata_set_list', 'famfam': 'table', 'permissions': [PERMISSION_METADATA_SET_VIEW]}
setup_metadata_set_edit = {'text': _(u'edit'), 'view': 'setup_metadata_set_edit', 'args': 'object.pk', 'famfam': 'table_edit', 'permissions': [PERMISSION_METADATA_SET_EDIT]}
setup_metadata_set_delete = {'text': _(u'delete'), 'view': 'setup_metadata_set_delete', 'args': 'object.pk', 'famfam': 'table_delete', 'permissions': [PERMISSION_METADATA_SET_DELETE]}
setup_metadata_set_create = {'text': _(u'create new'), 'view': 'setup_metadata_set_create', 'famfam': 'table_add', 'permissions': [PERMISSION_METADATA_SET_CREATE]}
setup_document_type_metadata = {'text': _(u'default metadata'), 'view': 'setup_document_type_metadata', 'args': 'object.pk', 'famfam': 'xhtml', 'permissions': [PERMISSION_DOCUMENT_TYPE_EDIT]}
#register_links(Document, [metadata_add, metadata_edit, metadata_remove])
register_links(['metadata_add', 'metadata_edit', 'metadata_remove', 'metadata_view'], [metadata_add, metadata_edit, metadata_remove], menu_name='sidebar')
register_links(Document, [metadata_view], menu_name='form_header')#, metadata_edit, metadata_remove])
register_multi_item_links(['folder_view', 'index_instance_list', 'document_type_document_list', 'search', 'results', 'document_group_view', 'document_list', 'document_list_recent'], [metadata_multiple_add, metadata_multiple_edit, metadata_multiple_remove])
register_multi_item_links(['document_find_duplicates', 'folder_view', 'index_instance_list', 'document_type_document_list', 'search', 'results', 'document_group_view', 'document_list', 'document_list_recent'], [metadata_multiple_add, metadata_multiple_edit, metadata_multiple_remove])
register_links(MetadataType, [setup_metadata_type_edit, setup_metadata_type_delete])
register_links(['setup_metadata_type_delete', 'setup_metadata_type_edit', 'setup_metadata_type_list', 'setup_metadata_type_create'], [setup_metadata_type_create], menu_name='sidebar')

View File

@@ -115,22 +115,54 @@ def resolve_links(context, links, current_view, current_path, parsed_query_strin
new_link['url'] = urlquote(new_link['url'], parsed_query_string)
else:
new_link['active'] = False
if 'conditional_highlight' in link:
new_link['active'] = link['conditional_highlight'](context)
if 'conditional_disable' in link:
new_link['disabled'] = link['conditional_disable'](context)
else:
new_link['disabled'] = False
if current_view in link.get('children_views', []):
new_link['active'] = True
for child_url_regex in link.get('children_url_regex', []):
if re.compile(child_url_regex).match(current_path.lstrip('/')):
new_link['active'] = True
for cls in link.get('children_classes', []):
obj, object_name = get_navigation_object(context)
if type(obj) == cls or obj == cls:
new_link['active'] = True
context_links.append(new_link)
return context_links
def get_navigation_object(context):
try:
object_name = Variable('navigation_object_name').resolve(context)
except VariableDoesNotExist:
object_name = 'object'
try:
obj = Variable(object_name).resolve(context)
except VariableDoesNotExist:
obj = None
return obj, object_name
def _get_object_navigation_links(context, menu_name=None, links_dict=object_navigation):
request = Variable('request').resolve(context)
current_path = request.META['PATH_INFO']
current_view = resolve_to_name(current_path)
context_links = []
# Don't fudge with the original global dictionary
links_dict = links_dict.copy()
query_string = urlparse.urlparse(request.get_full_path()).query or urlparse.urlparse(request.META.get('HTTP_REFERER', u'/')).query
parsed_query_string = urlparse.parse_qs(query_string)
@@ -146,14 +178,14 @@ def _get_object_navigation_links(context, menu_name=None, links_dict=object_navi
pass
try:
object_name = Variable('navigation_object_name').resolve(context)
"""
Check for and inject a temporary navigation dictionary
"""
temp_navigation_links = Variable('temporary_navigation_links').resolve(context)
if temp_navigation_links:
links_dict.update(temp_navigation_links)
except VariableDoesNotExist:
object_name = 'object'
try:
obj = Variable(object_name).resolve(context)
except VariableDoesNotExist:
obj = None
pass
try:
links = links_dict[menu_name][current_view]['links']
@@ -162,6 +194,8 @@ def _get_object_navigation_links(context, menu_name=None, links_dict=object_navi
except KeyError:
pass
obj, object_name = get_navigation_object(context)
try:
links = links_dict[menu_name][type(obj)]['links']
for link in resolve_links(context, links, current_view, current_path, parsed_query_string):
@@ -169,6 +203,7 @@ def _get_object_navigation_links(context, menu_name=None, links_dict=object_navi
except KeyError:
pass
return context_links
@@ -192,6 +227,8 @@ class GetNavigationLinks(Node):
def render(self, context):
menu_name = resolve_template_variable(context, self.menu_name)
context[self.var_name] = _get_object_navigation_links(context, menu_name, links_dict=self.links_dict)
obj, object_name = get_navigation_object(context)
context['navigation_object'] = obj
return ''

View File

@@ -9,7 +9,7 @@ from documents.models import Document
from main.api import register_tool
from ocr.conf.settings import AUTOMATIC_OCR
from ocr.models import DocumentQueue
from ocr.models import DocumentQueue, QueueTransformation
#Permissions
PERMISSION_OCR_DOCUMENT = {'namespace': 'ocr', 'name': 'ocr_document', 'label': _(u'Submit document for OCR')}
@@ -30,20 +30,27 @@ re_queue_multiple_document = {'text': _('re-queue'), 'view': 're_queue_multiple_
queue_document_delete = {'text': _(u'delete'), 'view': 'queue_document_delete', 'args': 'object.id', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]}
queue_document_multiple_delete = {'text': _(u'delete'), 'view': 'queue_document_multiple_delete', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]}
document_queue_disable = {'text': _(u'stop queue'), 'view': 'document_queue_disable', 'args': 'object.id', 'famfam': 'control_stop_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]}
document_queue_enable = {'text': _(u'activate queue'), 'view': 'document_queue_enable', 'args': 'object.id', 'famfam': 'control_play_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]}
document_queue_disable = {'text': _(u'stop queue'), 'view': 'document_queue_disable', 'args': 'queue.id', 'famfam': 'control_stop_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]}
document_queue_enable = {'text': _(u'activate queue'), 'view': 'document_queue_enable', 'args': 'queue.id', 'famfam': 'control_play_blue', 'permissions': [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]}
all_document_ocr_cleanup = {'text': _(u'clean up pages content'), 'view': 'all_document_ocr_cleanup', 'famfam': 'text_strikethrough', 'permissions': [PERMISSION_OCR_CLEAN_ALL_PAGES], 'description': _(u'Runs a language filter to remove common OCR mistakes from document pages content.')}
queue_document_list = {'text': _(u'queue document list'), 'view': 'queue_document_list', 'famfam': 'hourglass', 'permissions': [PERMISSION_OCR_DOCUMENT]}
node_active_list = {'text': _(u'active tasks'), 'view': 'node_active_list', 'famfam': 'server_chart', 'permissions': [PERMISSION_OCR_DOCUMENT]}
setup_queue_transformation_list = {'text': _(u'transformations'), 'view': 'setup_queue_transformation_list', 'args': 'queue.pk', 'famfam': 'shape_move_front'}
setup_queue_transformation_create = {'text': _(u'add transformation'), 'view': 'setup_queue_transformation_create', 'args': 'queue.pk', 'famfam': 'shape_square_add'}
setup_queue_transformation_edit = {'text': _(u'edit'), 'view': 'setup_queue_transformation_edit', 'args': 'transformation.pk', 'famfam': 'shape_square_edit'}
setup_queue_transformation_delete = {'text': _(u'delete'), 'view': 'setup_queue_transformation_delete', 'args': 'transformation.pk', 'famfam': 'shape_square_delete'}
register_links(Document, [submit_document])
register_links(DocumentQueue, [document_queue_disable, document_queue_enable])
register_links(DocumentQueue, [document_queue_disable, document_queue_enable, setup_queue_transformation_list])
register_links(QueueTransformation, [setup_queue_transformation_edit, setup_queue_transformation_delete])
register_multi_item_links(['queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete])
register_links(['queue_document_list', 'node_active_list'], [queue_document_list, node_active_list], menu_name='secondary_menu')
register_links(['setup_queue_transformation_create', 'setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'document_queue_disable', 'document_queue_enable', 'queue_document_list', 'node_active_list', 'setup_queue_transformation_list'], [queue_document_list, node_active_list], menu_name='secondary_menu')
register_links(['setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'setup_queue_transformation_list', 'setup_queue_transformation_create'], [setup_queue_transformation_create], menu_name='sidebar')
register_tool(all_document_ocr_cleanup, namespace='ocr', title=_(u'OCR'))

View File

@@ -9,13 +9,18 @@ import sys
from django.utils.translation import ugettext as _
from django.utils.importlib import import_module
from converter.api import convert_document_for_ocr
from common.conf.settings import TEMPORARY_DIRECTORY
from converter.api import convert
from documents.models import DocumentPage
from ocr.conf.settings import TESSERACT_PATH
from ocr.conf.settings import TESSERACT_LANGUAGE
from ocr.conf.settings import PDFTOTEXT_PATH
from ocr.exceptions import TesseractError, PdftotextError
from ocr.exceptions import TesseractError, UnpaperError
from ocr.conf.settings import UNPAPER_PATH
from ocr.parsers import parse_document_page
from ocr.parsers.exceptions import ParserError, ParserUnknownFile
from ocr.literals import DEFAULT_OCR_FILE_FORMAT, UNPAPER_FILE_FORMAT, \
DEFAULT_OCR_FILE_EXTENSION
def get_language_backend():
@@ -30,7 +35,7 @@ def get_language_backend():
return None
return module
backend = get_language_backend()
language_backend = get_language_backend()
def cleanup(filename):
@@ -43,78 +48,81 @@ def cleanup(filename):
pass
def run_tesseract(input_filename, output_filename_base, lang=None):
def run_tesseract(input_filename, lang=None):
"""
Execute the command line binary of tesseract
"""
command = [unicode(TESSERACT_PATH), unicode(input_filename), unicode(output_filename_base)]
if lang is not None:
command += [u'-l', lang]
fd, filepath = tempfile.mkstemp()
os.close(fd)
ocr_output = os.extsep.join([filepath, u'txt'])
command = [unicode(TESSERACT_PATH), unicode(input_filename), unicode(filepath)]
# TODO: Tesseract 3.0 segfaults
#if lang is not None:
# command.extend([u'-l', lang])
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
error_text = proc.stderr.read()
cleanup(filepath)
cleanup(ocr_output)
raise TesseractError(error_text)
fd = codecs.open(ocr_output, 'r', 'utf-8')
text = fd.read().strip()
fd.close()
os.unlink(filepath)
return text
def run_pdftotext(input_filename, output_filename, page_number=None):
def do_document_ocr(queue_document):
"""
Execute the command line binary of pdftotext
Try first to extract text from document pages using the registered
parser, if the parser fails or if there is no parser registered for
the document mimetype do a visual OCR by calling tesseract
"""
command = [unicode(PDFTOTEXT_PATH)]
if page_number:
command.extend([u'-nopgbrk', u'-f', unicode(page_number), u'-l', unicode(page_number)])
command.extend([unicode(input_filename), unicode(output_filename)])
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
error_text = proc.stderr.read()
raise PdftotextError(error_text)
def do_document_ocr(document):
"""
Do OCR on all the pages of the given document object, first
trying to extract text from PDF using pdftotext then by calling
tesseract
"""
for page_index, document_page in enumerate(document.documentpage_set.all()):
desc, filepath = tempfile.mkstemp()
imagefile = None
source = u''
for document_page in queue_document.document.documentpage_set.all():
try:
if document.file_mimetype == u'application/pdf':
pdf_filename = os.extsep.join([filepath, u'pdf'])
document.save_to_file(pdf_filename)
run_pdftotext(pdf_filename, filepath, document_page.page_number)
cleanup(pdf_filename)
if os.stat(filepath).st_size == 0:
#PDF page had no text, run tesseract on the page
imagefile = convert_document_for_ocr(document, page=page_index)
run_tesseract(imagefile, filepath, TESSERACT_LANGUAGE)
ocr_output = os.extsep.join([filepath, u'txt'])
source = _(u'Text from OCR')
else:
ocr_output = filepath
source = _(u'Text extracted from PDF')
else:
imagefile = convert_document_for_ocr(document, page=page_index)
run_tesseract(imagefile, filepath, TESSERACT_LANGUAGE)
ocr_output = os.extsep.join([filepath, u'txt'])
source = _(u'Text from OCR')
f = codecs.open(ocr_output, 'r', 'utf-8')
document_page = document.documentpage_set.get(page_number=page_index + 1)
document_page.content = ocr_cleanup(f.read().strip())
document_page.page_label = source
document_page.save()
f.close()
cleanup(ocr_output)
finally:
os.close(desc)
cleanup(filepath)
if imagefile:
cleanup(imagefile)
# Try to extract text by means of a parser
parse_document_page(document_page)
except (ParserError, ParserUnknownFile):
# Fall back to doing visual OCR
##ocr_transformations, warnings = queue_document.get_transformation_list()
document_filepath = document_page.document.get_image_cache_name(page=document_page.page_number)
unpaper_output_filename = u'%s_unpaper_out_page_%s%s%s' % (document_page.document.uuid, document_page.page_number, os.extsep, UNPAPER_FILE_FORMAT)
unpaper_output_filepath = os.path.join(TEMPORARY_DIRECTORY, unpaper_output_filename)
unpaper_input=convert(document_filepath, file_format=UNPAPER_FILE_FORMAT)
execute_unpaper(input_filepath=unpaper_input, output_filepath=unpaper_output_filepath)
#from PIL import Image, ImageOps
#im = Image.open(document_filepath)
##if im.mode=='RGBA':
## im=im.convert('RGB')
##im = im.convert('L')
#im = ImageOps.grayscale(im)
#im.save(unpaper_output_filepath)
# Convert to TIFF
pre_ocr_filepath = output_filepath=convert(input_filepath=unpaper_output_filepath, file_format=DEFAULT_OCR_FILE_FORMAT)
# Tesseract needs an explicit file extension
pre_ocr_filepath_w_ext = os.extsep.join([pre_ocr_filepath, DEFAULT_OCR_FILE_EXTENSION])
os.rename(pre_ocr_filepath, pre_ocr_filepath_w_ext)
try:
ocr_text = run_tesseract(pre_ocr_filepath_w_ext, TESSERACT_LANGUAGE)
document_page.content = ocr_cleanup(ocr_text)
document_page.page_label = _(u'Text from OCR')
document_page.save()
finally:
cleanup(pre_ocr_filepath_w_ext)
cleanup(unpaper_input)
cleanup(document_filepath)
cleanup(unpaper_output_filepath)
def ocr_cleanup(text):
@@ -127,8 +135,8 @@ def ocr_cleanup(text):
for line in text.splitlines():
line = line.strip()
for word in line.split():
if backend:
result = backend.check_word(word)
if language_backend:
result = language_backend.check_word(word)
else:
result = word
if result:
@@ -147,3 +155,19 @@ def clean_pages():
if page.content:
page.content = ocr_cleanup(page.content)
page.save()
def execute_unpaper(input_filepath, output_filepath):
"""
Executes the program unpaper using subprocess's Popen
"""
command = []
command.append(UNPAPER_PATH)
command.append(u'--overwrite')
command.append(u'--no-multi-pages')
command.append(input_filepath)
command.append(output_filepath)
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
raise UnpaperError(proc.stderr.readline())

View File

@@ -13,8 +13,8 @@ register_settings(
{'name': u'REPLICATION_DELAY', 'global_name': u'OCR_REPLICATION_DELAY', 'default': 10, 'description': _(u'Amount of seconds to delay OCR of documents to allow for the node\'s storage replication overhead.')},
{'name': u'NODE_CONCURRENT_EXECUTION', 'global_name': u'OCR_NODE_CONCURRENT_EXECUTION', 'default': 1, 'description': _(u'Maximum amount of concurrent document OCRs a node can perform.')},
{'name': u'AUTOMATIC_OCR', 'global_name': u'OCR_AUTOMATIC_OCR', 'default': False, 'description': _(u'Automatically queue newly created documents for OCR.')},
{'name': u'PDFTOTEXT_PATH', 'global_name': u'OCR_PDFTOTEXT_PATH', 'default': u'/usr/bin/pdftotext', 'exists': True},
{'name': u'QUEUE_PROCESSING_INTERVAL', 'global_name': u'OCR_QUEUE_PROCESSING_INTERVAL', 'default': 10},
{'name': u'CACHE_URI', 'global_name': u'OCR_CACHE_URI', 'default': None, 'description': _(u'URI in the form: "memcached://127.0.0.1:11211/" to specify a cache backend to use for locking. Multiple hosts can be specified separated by a semicolon.')}
{'name': u'CACHE_URI', 'global_name': u'OCR_CACHE_URI', 'default': None, 'description': _(u'URI in the form: "memcached://127.0.0.1:11211/" to specify a cache backend to use for locking. Multiple hosts can be specified separated by a semicolon.')},
{'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True},
]
)

View File

@@ -6,5 +6,8 @@ class TesseractError(Exception):
pass
class PdftotextError(Exception):
class UnpaperError(Exception):
"""
Raised by unpaper
"""
pass

21
apps/ocr/forms.py Normal file
View File

@@ -0,0 +1,21 @@
from django import forms
from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ugettext
from ocr.models import QueueTransformation
class QueueTransformationForm(forms.ModelForm):
class Meta:
model = QueueTransformation
def __init__(self, *args, **kwargs):
super(QueueTransformationForm, self).__init__(*args, **kwargs)
self.fields['content_type'].widget = forms.HiddenInput()
self.fields['object_id'].widget = forms.HiddenInput()
class QueueTransformationForm_create(forms.ModelForm):
class Meta:
model = QueueTransformation
exclude = ('content_type', 'object_id')

View File

@@ -19,3 +19,7 @@ QUEUEDOCUMENT_STATE_CHOICES = (
(QUEUEDOCUMENT_STATE_PROCESSING, _(u'processing')),
(QUEUEDOCUMENT_STATE_ERROR, _(u'error')),
)
DEFAULT_OCR_FILE_FORMAT = u'tiff'
DEFAULT_OCR_FILE_EXTENSION = u'tif'
UNPAPER_FILE_FORMAT = u'ppm'

View File

@@ -1,18 +0,0 @@
from django.db import models
from ocr.exceptions import AlreadyQueued
class DocumentQueueManager(models.Manager):
"""
Module manager class to handle adding documents to an OCR document
queue
"""
def queue_document(self, document, queue_name='default'):
document_queue = self.model.objects.get(name=queue_name)
if document_queue.queuedocument_set.filter(document=document):
raise AlreadyQueued
document_queue.queuedocument_set.create(document=document, delay=True)
return document_queue

41
apps/ocr/managers.py Normal file
View File

@@ -0,0 +1,41 @@
from django.db import models
from django.contrib.contenttypes.models import ContentType
from ocr.exceptions import AlreadyQueued
class DocumentQueueManager(models.Manager):
"""
Module manager class to handle adding documents to an OCR document
queue
"""
def queue_document(self, document, queue_name='default'):
document_queue = self.model.objects.get(name=queue_name)
if document_queue.queuedocument_set.filter(document=document):
raise AlreadyQueued
document_queue.queuedocument_set.create(document=document, delay=True)
return document_queue
class QueueTransformationManager(models.Manager):
def get_for_object(self, obj):
ct = ContentType.objects.get_for_model(obj)
return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk)
def get_for_object_as_list(self, obj):
warnings = []
transformations = []
for transformation in self.get_for_object(obj).values('transformation', 'arguments'):
try:
transformations.append(
{
'transformation': transformation['transformation'],
'arguments': eval(transformation['arguments'], {})
}
)
except Exception, e:
warnings.append(e)
return transformations, warnings

View File

@@ -2,13 +2,16 @@ from django.db import models
from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ugettext
from django.core.exceptions import ObjectDoesNotExist
from django.contrib.contenttypes.models import ContentType
from django.contrib.contenttypes import generic
from documents.models import Document
from converter.api import get_available_transformations_choices
from ocr.literals import DOCUMENTQUEUE_STATE_STOPPED, \
DOCUMENTQUEUE_STATE_CHOICES, QUEUEDOCUMENT_STATE_PENDING, \
QUEUEDOCUMENT_STATE_CHOICES
from ocr.manager import DocumentQueueManager
from ocr.managers import DocumentQueueManager, QueueTransformationManager
class DocumentQueue(models.Model):
@@ -45,9 +48,35 @@ class QueueDocument(models.Model):
ordering = ('datetime_submitted',)
verbose_name = _(u'queue document')
verbose_name_plural = _(u'queue documents')
def get_transformation_list(self):
return QueueTransformation.objects.get_for_object_as_list(self)
def __unicode__(self):
try:
return unicode(self.document)
except ObjectDoesNotExist:
return ugettext(u'Missing document.')
class QueueTransformation(models.Model):
"""
Model that stores the transformation and transformation arguments
for a given document queue
"""
content_type = models.ForeignKey(ContentType)
object_id = models.PositiveIntegerField()
content_object = generic.GenericForeignKey('content_type', 'object_id')
order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True)
transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}')
objects = QueueTransformationManager()
def __unicode__(self):
return self.get_transformation_display()
class Meta:
ordering = ('order',)
verbose_name = _(u'document queue transformation')
verbose_name_plural = _(u'document queue transformations')

View File

@@ -0,0 +1,40 @@
import codecs
import os
import subprocess
import tempfile
import sys
import slate
from django.utils.translation import ugettext as _
from ocr.parsers.exceptions import ParserError, ParserUnknownFile
mimetype_registry = {}
def register_parser(mimetype, function):
mimetype_registry[mimetype] = {'function': function}
def pdf_parser(document_page):
fd = document_page.document.open()
pdf_pages = slate.PDF(fd)
fd.close()
if pdf_pages[document_page.page_number - 1] == '\x0c':
raise ParserError
document_page.content = pdf_pages[document_page.page_number - 1]
document_page.page_label = _(u'Text extracted from PDF')
document_page.save()
def parse_document_page(document_page):
try:
mimetype_registry[document_page.document.file_mimetype]['function'](document_page)
except KeyError:
raise ParserUnknownFile
register_parser('application/pdf', pdf_parser)

View File

@@ -0,0 +1,10 @@
class ParserError(Exception):
"""
Raised when a text parser fails to understand a file it been passed
or the resulting parsed text is invalid
"""
pass
class ParserUnknownFile(Exception):
pass

View File

@@ -56,7 +56,7 @@ def task_process_queue_document(queue_document_id):
queue_document.result = task_process_queue_document.request.id
queue_document.save()
try:
do_document_ocr(queue_document.document)
do_document_ocr(queue_document)
queue_document.delete()
except Exception, e:
queue_document.state = QUEUEDOCUMENT_STATE_ERROR

View File

@@ -1,16 +1,22 @@
from django.conf.urls.defaults import patterns, url
urlpatterns = patterns('ocr.views',
url(r'^(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'),
url(r'^ocr/queue/document/list/$', 'queue_document_list', (), 'queue_document_list'),
url(r'^ocr/queue/document/(?P<queue_document_id>\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'),
url(r'^ocr/queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'),
url(r'^ocr/queue/document/(?P<queue_document_id>\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'),
url(r'^ocr/queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'),
url(r'^document/(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'),
url(r'^queue/document/list/$', 'queue_document_list', (), 'queue_document_list'),
url(r'^queue/document/(?P<queue_document_id>\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'),
url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'),
url(r'^queue/document/(?P<queue_document_id>\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'),
url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'),
url(r'^ocr/queue/(?P<document_queue_id>\d+)/enable/$', 'document_queue_enable', (), 'document_queue_enable'),
url(r'^ocr/queue/(?P<document_queue_id>\d+)/disable/$', 'document_queue_disable', (), 'document_queue_disable'),
url(r'^queue/(?P<document_queue_id>\d+)/enable/$', 'document_queue_enable', (), 'document_queue_enable'),
url(r'^queue/(?P<document_queue_id>\d+)/disable/$', 'document_queue_disable', (), 'document_queue_disable'),
url(r'^ocr/document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'),
url(r'^ocr/node/active/list/$', 'node_active_list', (), 'node_active_list'),
url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'),
url(r'^node/active/list/$', 'node_active_list', (), 'node_active_list'),
url(r'^queue/(?P<document_queue_id>\d+)/transformation/list/$', 'setup_queue_transformation_list', (), 'setup_queue_transformation_list'),
url(r'^queue/(?P<document_queue_id>\w+)/transformation/create/$', 'setup_queue_transformation_create', (), 'setup_queue_transformation_create'),
url(r'^queue/transformation/(?P<transformation_id>\w+)/edit/$', 'setup_queue_transformation_edit', (), 'setup_queue_transformation_edit'),
url(r'^queue/transformation/(?P<transformation_id>\w+)/delete/$', 'setup_queue_transformation_delete', (), 'setup_queue_transformation_delete'),
)

View File

@@ -6,9 +6,8 @@ from django.shortcuts import render_to_response, get_object_or_404
from django.template import RequestContext
from django.contrib import messages
from django.views.generic.list_detail import object_list
from django.core.urlresolvers import reverse
from django.utils.translation import ugettext_lazy as _
from django.conf import settings
from django.core.urlresolvers import reverse
from celery.task.control import inspect
from permissions.api import check_permissions
@@ -18,12 +17,13 @@ from documents.widgets import document_link, document_thumbnail
from ocr import PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE, \
PERMISSION_OCR_QUEUE_ENABLE_DISABLE, PERMISSION_OCR_CLEAN_ALL_PAGES
from ocr.models import DocumentQueue, QueueDocument
from ocr.models import DocumentQueue, QueueDocument, QueueTransformation
from ocr.literals import QUEUEDOCUMENT_STATE_PENDING, \
QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_STOPPED, \
DOCUMENTQUEUE_STATE_ACTIVE
from ocr.exceptions import AlreadyQueued
from ocr.api import clean_pages
from ocr.forms import QueueTransformationForm, QueueTransformationForm_create
def queue_document_list(request, queue_name='default'):
@@ -38,8 +38,10 @@ def queue_document_list(request, queue_name='default'):
extra_context={
'title': _(u'documents in queue: %s') % document_queue,
'hide_object': True,
'object': document_queue,
'queue': document_queue,
'object_name': _(u'document queue'),
'navigation_object_name': 'queue',
'list_object_variable_name': 'queue_document',
'extra_columns': [
{'name': 'document', 'attribute': lambda x: document_link(x.document) if hasattr(x, 'document') else _(u'Missing document.')},
{'name': _(u'thumbnail'), 'attribute': lambda x: document_thumbnail(x.document)},
@@ -212,7 +214,8 @@ def document_queue_disable(request, document_queue_id):
return HttpResponseRedirect(next)
return render_to_response('generic_confirm.html', {
'object': document_queue,
'queue': document_queue,
'navigation_object_name': 'queue',
'title': _(u'Are you sure you wish to disable document queue: %s') % document_queue,
'next': next,
'previous': previous,
@@ -238,7 +241,8 @@ def document_queue_enable(request, document_queue_id):
return HttpResponseRedirect(next)
return render_to_response('generic_confirm.html', {
'object': document_queue,
'queue': document_queue,
'navigation_object_name': 'queue',
'title': _(u'Are you sure you wish to activate document queue: %s') % document_queue,
'next': next,
'previous': previous,
@@ -317,3 +321,141 @@ def node_active_list(request):
{'name': _(u'related object'), 'attribute': lambda x: display_link(x['related_object']) if x['related_object'] else u''}
],
}, context_instance=RequestContext(request))
def setup_queue_transformation_list(request, document_queue_id):
#check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT])
document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id)
context = {
'object_list': QueueTransformation.objects.get_for_object(document_queue),
'title': _(u'transformations for: %s') % document_queue,
#'object_name': _(u'document queue'),
#'object': document_queue,
'queue': document_queue,
'object_name': _(u'document queue'),
'navigation_object_name': 'queue',
'list_object_variable_name': 'transformation',
'extra_columns': [
{'name': _(u'order'), 'attribute': 'order'},
{'name': _(u'transformation'), 'attribute': lambda x: x.get_transformation_display()},
{'name': _(u'arguments'), 'attribute': 'arguments'}
],
'hide_link': True,
'hide_object': True,
}
return render_to_response('generic_list.html', context,
context_instance=RequestContext(request))
def setup_queue_transformation_edit(request, transformation_id):
#check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT])
transformation = get_object_or_404(QueueTransformation, pk=transformation_id)
redirect_view = reverse('setup_queue_transformation_list', args=[transformation.content_object.pk])
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', redirect_view)))
if request.method == 'POST':
form = QueueTransformationForm(instance=transformation, data=request.POST)
if form.is_valid():
try:
# Test the validity of the argument field
eval(form.cleaned_data['arguments'], {})
except:
messages.error(request, _(u'Queue transformation argument error.'))
else:
try:
form.save()
messages.success(request, _(u'Queue transformation edited successfully'))
return HttpResponseRedirect(next)
except Exception, e:
messages.error(request, _(u'Error editing queue transformation; %s') % e)
else:
form = QueueTransformationForm(instance=transformation)
return render_to_response('generic_form.html', {
'title': _(u'Edit transformation: %s') % transformation,
'form': form,
'queue': transformation.content_object,
'transformation': transformation,
'navigation_object_list': [
{'object': 'queue', 'name': _(u'document queue')},
{'object': 'transformation', 'name': _(u'transformation')}
],
'next': next,
},
context_instance=RequestContext(request))
def setup_queue_transformation_delete(request, transformation_id):
#check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT])
transformation = get_object_or_404(QueueTransformation, pk=transformation_id)
redirect_view = reverse('setup_queue_transformation_list', args=[transformation.content_object.pk])
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', redirect_view)))
if request.method == 'POST':
try:
transformation.delete()
messages.success(request, _(u'Queue transformation deleted successfully.'))
except Exception, e:
messages.error(request, _(u'Error deleting queue transformation; %(error)s') % {
'error': e}
)
return HttpResponseRedirect(redirect_view)
return render_to_response('generic_confirm.html', {
'delete_view': True,
'transformation': transformation,
'queue': transformation.content_object,
'navigation_object_list': [
{'object': 'queue', 'name': _(u'document queue')},
{'object': 'transformation', 'name': _(u'transformation')}
],
'title': _(u'Are you sure you wish to delete queue transformation "%(transformation)s"') % {
'transformation': transformation.get_transformation_display(),
},
'previous': previous,
'form_icon': u'shape_square_delete.png',
},
context_instance=RequestContext(request))
def setup_queue_transformation_create(request, document_queue_id):
#check_permissions(request.user, [PERMISSION_SOURCES_SETUP_EDIT])
document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id)
redirect_view = reverse('setup_queue_transformation_list', args=[document_queue.pk])
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', redirect_view)))
if request.method == 'POST':
form = QueueTransformationForm_create(request.POST)
if form.is_valid():
try:
# Test the validity of the argument field
eval(form.cleaned_data['arguments'], {})
except:
messages.error(request, _(u'Queue transformation argument error.'))
else:
try:
queue_tranformation = form.save(commit=False)
queue_tranformation.content_object = document_queue
queue_tranformation.save()
messages.success(request, _(u'Queue transformation created successfully'))
return HttpResponseRedirect(redirect_view)
except Exception, e:
messages.error(request, _(u'Error creating queue transformation; %s') % e)
else:
form = QueueTransformationForm_create()
return render_to_response('generic_form.html', {
'form': form,
'queue': document_queue,
'object_name': _(u'document queue'),
'navigation_object_name': 'queue',
'title': _(u'Create new transformation for queue: %s') % document_queue,
}, context_instance=RequestContext(request))

56
apps/sources/__init__.py Normal file
View File

@@ -0,0 +1,56 @@
from django.utils.translation import ugettext_lazy as _
from navigation.api import register_links, \
register_model_list_columns, register_multi_item_links, \
register_sidebar_template
from permissions.api import register_permission, set_namespace_title
from sources.staging import StagingFile
from sources.models import WebForm, StagingFolder, SourceTransformation
PERMISSION_SOURCES_SETUP_VIEW = {'namespace': 'sources_setup', 'name': 'sources_setup_view', 'label': _(u'View exisinting document sources')}
PERMISSION_SOURCES_SETUP_EDIT = {'namespace': 'sources_setup', 'name': 'sources_setup_edit', 'label': _(u'Edit document sources')}
PERMISSION_SOURCES_SETUP_DELETE = {'namespace': 'sources_setup', 'name': 'sources_setup_delete', 'label': _(u'Delete document sources')}
PERMISSION_SOURCES_SETUP_CREATE = {'namespace': 'sources_setup', 'name': 'sources_setup_create', 'label': _(u'Create new document sources')}
set_namespace_title('sources_setup', _(u'Sources setup'))
register_permission(PERMISSION_SOURCES_SETUP_VIEW)
register_permission(PERMISSION_SOURCES_SETUP_EDIT)
register_permission(PERMISSION_SOURCES_SETUP_DELETE)
register_permission(PERMISSION_SOURCES_SETUP_CREATE)
staging_file_preview = {'text': _(u'preview'), 'class': 'fancybox-noscaling', 'view': 'staging_file_preview', 'args': ['source.source_type', 'source.pk', 'object.id'], 'famfam': 'zoom'}
staging_file_delete = {'text': _(u'delete'), 'view': 'staging_file_delete', 'args': ['source.source_type', 'source.pk', 'object.id'], 'famfam': 'delete', 'keep_query': True}
setup_web_form_list = {'text': _(u'web forms'), 'view': 'setup_web_form_list', 'famfam': 'application_form', 'children_classes': [WebForm]}
setup_staging_folder_list = {'text': _(u'staging folders'), 'view': 'setup_staging_folder_list', 'famfam': 'folder_magnify', 'children_classes': [StagingFolder]}
setup_source_edit = {'text': _(u'edit'), 'view': 'setup_source_edit', 'args': ['source.source_type', 'source.pk'], 'famfam': 'application_form_edit'}
setup_source_delete = {'text': _(u'delete'), 'view': 'setup_source_delete', 'args': ['source.source_type', 'source.pk'], 'famfam': 'application_form_delete'}
setup_source_create = {'text': _(u'add new'), 'view': 'setup_source_create', 'args': 'source_type', 'famfam': 'application_form_add'}
setup_source_transformation_list = {'text': _(u'transformations'), 'view': 'setup_source_transformation_list', 'args': ['source.source_type', 'source.pk'], 'famfam': 'shape_move_front'}
setup_source_transformation_create = {'text': _(u'add transformation'), 'view': 'setup_source_transformation_create', 'args': ['source.source_type', 'source.pk'], 'famfam': 'shape_square_add'}
setup_source_transformation_edit = {'text': _(u'edit'), 'view': 'setup_source_transformation_edit', 'args': 'transformation.pk', 'famfam': 'shape_square_edit'}
setup_source_transformation_delete = {'text': _(u'delete'), 'view': 'setup_source_transformation_delete', 'args': 'transformation.pk', 'famfam': 'shape_square_delete'}
source_list = {'text': _(u'Document sources'), 'view': 'setup_web_form_list', 'famfam': 'page_add', 'children_url_regex': [r'sources/setup']}
register_links(StagingFile, [staging_file_preview, staging_file_delete])
register_links(SourceTransformation, [setup_source_transformation_edit, setup_source_transformation_delete])
register_links(['setup_web_form_list', 'setup_staging_folder_list', 'setup_source_create'], [setup_web_form_list, setup_staging_folder_list], menu_name='form_header')
register_links(WebForm, [setup_web_form_list, setup_staging_folder_list], menu_name='form_header')
register_links(WebForm, [setup_source_transformation_list, setup_source_edit, setup_source_delete])
register_links(['setup_web_form_list', 'setup_staging_folder_list', 'setup_source_edit', 'setup_source_delete', 'setup_source_create'], [setup_source_create], menu_name='sidebar')
register_links(StagingFolder, [setup_web_form_list, setup_staging_folder_list], menu_name='form_header')
register_links(StagingFolder, [setup_source_transformation_list, setup_source_edit, setup_source_delete])
register_links(['setup_source_transformation_create', 'setup_source_transformation_edit', 'setup_source_transformation_delete', 'setup_source_transformation_list'], [setup_source_transformation_create], menu_name='sidebar')
source_views = ['setup_web_form_list', 'setup_staging_folder_list', 'setup_source_edit', 'setup_source_delete', 'setup_source_create', 'setup_source_transformation_list', 'setup_source_transformation_edit', 'setup_source_transformation_delete', 'setup_source_transformation_create']

8
apps/sources/admin.py Normal file
View File

@@ -0,0 +1,8 @@
from django.contrib import admin
from sources.models import StagingFolder, WebForm, SourceTransformation
admin.site.register(StagingFolder)
admin.site.register(WebForm)
admin.site.register(SourceTransformation)

103
apps/sources/forms.py Normal file
View File

@@ -0,0 +1,103 @@
from django import forms
from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ugettext
from documents.forms import DocumentForm
from sources.models import WebForm, StagingFolder, SourceTransformation
from sources.widgets import FamFamRadioSelect
from sources.utils import validate_whitelist_blacklist
class StagingDocumentForm(DocumentForm):
"""
Form that show all the files in the staging folder specified by the
StagingFile class passed as 'cls' argument
"""
def __init__(self, *args, **kwargs):
cls = kwargs.pop('cls')
show_expand = kwargs.pop('show_expand', False)
self.source = kwargs.pop('source')
super(StagingDocumentForm, self).__init__(*args, **kwargs)
try:
self.fields['staging_file_id'].choices = [
(staging_file.id, staging_file) for staging_file in cls.get_all()
]
except:
pass
if show_expand:
self.fields['expand'] = forms.BooleanField(
label=_(u'Expand compressed files'), required=False,
help_text=ugettext(u'Upload a compressed file\'s contained files as individual documents')
)
# Put staging_list field first in the field order list
staging_list_index = self.fields.keyOrder.index('staging_file_id')
staging_list = self.fields.keyOrder.pop(staging_list_index)
self.fields.keyOrder.insert(0, staging_list)
staging_file_id = forms.ChoiceField(label=_(u'Staging file'))
class Meta(DocumentForm.Meta):
exclude = ('description', 'file', 'document_type', 'tags')
class WebFormForm(DocumentForm):
def __init__(self, *args, **kwargs):
show_expand = kwargs.pop('show_expand', False)
self.source = kwargs.pop('source')
super(WebFormForm, self).__init__(*args, **kwargs)
print self.instance
if show_expand:
self.fields['expand'] = forms.BooleanField(
label=_(u'Expand compressed files'), required=False,
help_text=ugettext(u'Upload a compressed file\'s contained files as individual documents')
)
def clean_file(self):
data = self.cleaned_data['file']
validate_whitelist_blacklist(data.name, self.source.whitelist.split(','), self.source.blacklist.split(','))
return data
class WebFormSetupForm(forms.ModelForm):
def __init__(self, *args, **kwargs):
super(WebFormSetupForm, self).__init__(*args, **kwargs)
self.fields['icon'].widget = FamFamRadioSelect(
attrs=self.fields['icon'].widget.attrs,
choices=self.fields['icon'].widget.choices,
)
class Meta:
model = WebForm
class StagingFolderSetupForm(forms.ModelForm):
def __init__(self, *args, **kwargs):
super(StagingFolderSetupForm, self).__init__(*args, **kwargs)
self.fields['icon'].widget = FamFamRadioSelect(
attrs=self.fields['icon'].widget.attrs,
choices=self.fields['icon'].widget.choices,
)
class Meta:
model = StagingFolder
class SourceTransformationForm(forms.ModelForm):
class Meta:
model = SourceTransformation
def __init__(self, *args, **kwargs):
super(SourceTransformationForm, self).__init__(*args, **kwargs)
self.fields['content_type'].widget = forms.HiddenInput()
self.fields['object_id'].widget = forms.HiddenInput()
class SourceTransformationForm_create(forms.ModelForm):
class Meta:
model = SourceTransformation
exclude = ('content_type', 'object_id')

24
apps/sources/managers.py Normal file
View File

@@ -0,0 +1,24 @@
from django.db import models
from django.contrib.contenttypes.models import ContentType
class SourceTransformationManager(models.Manager):
def get_for_object(self, obj):
ct = ContentType.objects.get_for_model(obj)
return self.model.objects.filter(content_type=ct).filter(object_id=obj.pk)
def get_for_object_as_list(self, obj):
warnings = []
transformations = []
for transformation in self.get_for_object(obj).values('transformation', 'arguments'):
try:
transformations.append(
{
'transformation': transformation['transformation'],
'arguments': eval(transformation['arguments'], {})
}
)
except Exception, e:
warnings.append(e)
return transformations, warnings

178
apps/sources/models.py Normal file
View File

@@ -0,0 +1,178 @@
from django.db import models
from django.utils.translation import ugettext_lazy as _
from django.contrib.contenttypes.models import ContentType
from django.contrib.contenttypes import generic
from documents.models import DocumentType
from documents.managers import RecentDocumentManager
from metadata.models import MetadataType
from converter.api import get_available_transformations_choices
from converter.literals import DIMENSION_SEPARATOR
from sources.managers import SourceTransformationManager
SOURCE_UNCOMPRESS_CHOICE_Y = 'y'
SOURCE_UNCOMPRESS_CHOICE_N = 'n'
SOURCE_UNCOMPRESS_CHOICE_ASK = 'a'
SOURCE_UNCOMPRESS_CHOICES = (
(SOURCE_UNCOMPRESS_CHOICE_Y, _(u'Always')),
(SOURCE_UNCOMPRESS_CHOICE_N, _(u'Never')),
)
SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES = (
(SOURCE_UNCOMPRESS_CHOICE_Y, _(u'Always')),
(SOURCE_UNCOMPRESS_CHOICE_N, _(u'Never')),
(SOURCE_UNCOMPRESS_CHOICE_ASK, _(u'Ask user'))
)
SOURCE_ICON_DISK = 'disk'
SOURCE_ICON_DATABASE = 'database'
SOURCE_ICON_DRIVE = 'drive'
SOURCE_ICON_DRIVE_NETWORK = 'drive_network'
SOURCE_ICON_DRIVE_USER = 'drive_user'
SOURCE_ICON_EMAIL = 'email'
SOURCE_ICON_FOLDER = 'folder'
SOURCE_ICON_WORLD = 'world'
SOURCE_ICON_PRINTER = 'printer'
SOURCE_ICON_PRINTER_EMPTY = 'printer_empty'
SOURCE_ICON_CHOICES = (
(SOURCE_ICON_DISK, _(u'Disk')),
(SOURCE_ICON_DATABASE, _(u'Database')),
(SOURCE_ICON_DRIVE, _(u'Drive')),
(SOURCE_ICON_DRIVE_NETWORK, _(u'Network drive')),
(SOURCE_ICON_DRIVE_USER, _(u'User drive')),
(SOURCE_ICON_EMAIL, _(u'Envelope')),
(SOURCE_ICON_FOLDER, _(u'Folder')),
(SOURCE_ICON_WORLD, _(u'World')),
(SOURCE_ICON_PRINTER, _(u'Printer')),
(SOURCE_ICON_PRINTER_EMPTY, _(u'Empty printer')),
)
SOURCE_CHOICE_WEB_FORM = 'webform'
SOURCE_CHOICE_STAGING = 'staging'
SOURCE_CHOICES = (
(SOURCE_CHOICE_WEB_FORM, _(u'web form')),
(SOURCE_CHOICE_STAGING, _(u'server staging folder')),
)
SOURCE_CHOICES_PLURAL = (
(SOURCE_CHOICE_WEB_FORM, _(u'web forms')),
(SOURCE_CHOICE_STAGING, _(u'server staging folders')),
)
class BaseModel(models.Model):
title = models.CharField(max_length=64, verbose_name=_(u'title'))
enabled = models.BooleanField(default=True, verbose_name=_(u'enabled'))
whitelist = models.TextField(blank=True, verbose_name=_(u'whitelist'))
blacklist = models.TextField(blank=True, verbose_name=_(u'blacklist'))
document_type = models.ForeignKey(DocumentType, blank=True, null=True, verbose_name=_(u'document type'), help_text=(u'Optional document type to be applied to documents uploaded from this source.'))
def __unicode__(self):
return u'%s' % self.title
def fullname(self):
return u' '.join([self.class_fullname(), '"%s"' % self.title])
@classmethod
def class_fullname(cls):
return unicode(dict(SOURCE_CHOICES).get(cls.source_type))
@classmethod
def class_fullname_plural(cls):
return unicode(dict(SOURCE_CHOICES_PLURAL).get(cls.source_type))
class Meta:
ordering = ('title',)
abstract = True
class InteractiveBaseModel(BaseModel):
icon = models.CharField(blank=True, null=True, max_length=24, choices=SOURCE_ICON_CHOICES, verbose_name=_(u'icon'), help_text=_(u'An icon to visually distinguish this source.'))
def save(self, *args, **kwargs):
if not self.icon:
self.icon = self.default_icon
super(BaseModel, self).save(*args, **kwargs)
class Meta(BaseModel.Meta):
abstract = True
class StagingFolder(InteractiveBaseModel):
is_interactive = True
source_type = SOURCE_CHOICE_STAGING
default_icon = SOURCE_ICON_DRIVE
folder_path = models.CharField(max_length=255, verbose_name=_(u'folder path'), help_text=_(u'Server side filesystem path.'))
preview_width = models.IntegerField(blank=True, null=True, verbose_name=_(u'preview width'), help_text=_(u'Width value to be passed to the converter backend.'))
preview_height = models.IntegerField(blank=True, null=True, verbose_name=_(u'preview height'), help_text=_(u'Height value to be passed to the converter backend.'))
uncompress = models.CharField(max_length=1, choices=SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, verbose_name=_(u'uncompress'), help_text=_(u'Whether to expand or not compressed archives.'))
delete_after_upload = models.BooleanField(default=True, verbose_name=_(u'delete after upload'), help_text=_(u'Delete the file after is has been successfully uploaded.'))
def get_preview_size(self):
dimensions = []
dimensions.append(unicode(self.preview_width))
if self.preview_height:
dimensions.append(unicode(self.preview_height))
return DIMENSION_SEPARATOR.join(dimensions)
class Meta(InteractiveBaseModel.Meta):
verbose_name = _(u'staging folder')
verbose_name_plural = _(u'staging folder')
'''
class SourceMetadata(models.Model):
content_type = models.ForeignKey(ContentType)
object_id = models.PositiveIntegerField()
content_object = generic.GenericForeignKey('content_type', 'object_id')
metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type'))
value = models.CharField(max_length=256, blank=True, verbose_name=_(u'value'))
def __unicode__(self):
return self.source
class Meta:
verbose_name = _(u'source metadata')
verbose_name_plural = _(u'sources metadata')
'''
class WebForm(InteractiveBaseModel):
is_interactive = True
source_type = SOURCE_CHOICE_WEB_FORM
default_icon = SOURCE_ICON_DISK
uncompress = models.CharField(max_length=1, choices=SOURCE_INTERACTIVE_UNCOMPRESS_CHOICES, verbose_name=_(u'uncompress'), help_text=_(u'Whether to expand or not compressed archives.'))
#Default path
class Meta(InteractiveBaseModel.Meta):
verbose_name = _(u'web form')
verbose_name_plural = _(u'web forms')
class SourceTransformation(models.Model):
"""
Model that stores the transformation and transformation arguments
for a given document source
"""
content_type = models.ForeignKey(ContentType)
object_id = models.PositiveIntegerField()
content_object = generic.GenericForeignKey('content_type', 'object_id')
order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True)
transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation'))
arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}')
objects = SourceTransformationManager()
def __unicode__(self):
#return u'"%s" for %s' % (self.get_transformation_display(), unicode(self.content_object))
return self.get_transformation_display()
class Meta:
ordering = ('order',)
verbose_name = _(u'document source transformation')
verbose_name_plural = _(u'document source transformations')

View File

@@ -8,35 +8,27 @@ from django.utils.translation import ugettext
from django.contrib import messages
from django.utils.translation import ugettext_lazy as _
from converter import TRANFORMATION_CHOICES
from converter.api import convert, cache_cleanup
from documents.conf.settings import STAGING_DIRECTORY
from documents.conf.settings import DEFAULT_TRANSFORMATIONS
from documents.conf.settings import STAGING_FILES_PREVIEW_SIZE
from documents.conf.settings import USER_STAGING_DIRECTORY_ROOT
from documents.conf.settings import USER_STAGING_DIRECTORY_EXPRESSION
from documents.literals import UPLOAD_SOURCE_STAGING, \
UPLOAD_SOURCE_USER_STAGING
DEFAULT_STAGING_DIRECTORY = u'/tmp'
HASH_FUNCTION = lambda x: hashlib.sha256(x).hexdigest()
#TODO: Do benchmarks
#func = lambda:[StagingFile.get_all() is None for i in range(100)]
#t1=time.time();func();t2=time.time();print '%s took %0.3f ms' % (func.func_name, (t2-t1)*1000.0)
STAGING_FILE_FUNCTIONS = {
UPLOAD_SOURCE_STAGING: lambda x: STAGING_DIRECTORY,
UPLOAD_SOURCE_USER_STAGING: lambda x: os.path.join(USER_STAGING_DIRECTORY_ROOT, eval(USER_STAGING_DIRECTORY_EXPRESSION, {'user': x.user}))
}
#STAGING_FILE_FUNCTIONS = {
# UPLOAD_SOURCE_STAGING: lambda x: STAGING_DIRECTORY,
# UPLOAD_SOURCE_USER_STAGING: lambda x: os.path.join(USER_STAGING_DIRECTORY_ROOT, eval(USER_STAGING_DIRECTORY_EXPRESSION, {'user': x.user}))
#}
def evaluate_user_staging_path(request, source):
try:
return STAGING_FILE_FUNCTIONS[source](request)
except Exception, exc:
messages.error(request, _(u'Error evaluating user staging directory expression; %s') % exc)
return u''
#def evaluate_user_staging_path(request, source):
# try:
# return STAGING_FILE_FUNCTIONS[source](request)
# except Exception, exc:
# messages.error(request, _(u'Error evaluating user staging directory expression; %s') % exc)
# return u''
def get_all_files(path):
@@ -52,7 +44,8 @@ def _return_new_class():
def create_staging_file_class(request, source):
cls = _return_new_class()
cls.set_path(evaluate_user_staging_path(request, source))
#cls.set_path(evaluate_user_staging_path(request, source))
cls.set_path(source)
return cls
@@ -61,7 +54,7 @@ class StagingFile(object):
Simple class to encapsulate the files in a directory and hide the
specifics to the view
"""
path = STAGING_DIRECTORY
path = DEFAULT_STAGING_DIRECTORY
@classmethod
def set_path(cls, path):
@@ -112,16 +105,15 @@ class StagingFile(object):
def upload(self):
"""
Return a StagingFile encapsulated in a File class instance to
allow for easier upload a staging files
allow for easier upload of staging files
"""
try:
return File(file(self.filepath, 'rb'), name=self.filename)
except Exception, exc:
raise Exception(ugettext(u'Unable to upload staging file: %s') % exc)
def delete(self):
tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS)
cache_cleanup(self.filepath, size=STAGING_FILES_PREVIEW_SIZE, extra_options=tranformation_string)
def delete(self, preview_size, transformations):
cache_cleanup(self.filepath, size=preview_size, transformations=transformations)
try:
os.unlink(self.filepath)
except OSError, exc:
@@ -130,22 +122,7 @@ class StagingFile(object):
else:
raise OSError(ugettext(u'Unable to delete staging file: %s') % exc)
def preview(self):
tranformation_string, errors = get_transformation_string(DEFAULT_TRANSFORMATIONS)
output_file = convert(self.filepath, size=STAGING_FILES_PREVIEW_SIZE, extra_options=tranformation_string, cleanup_files=False)
def preview(self, preview_size, transformations):
errors = []
output_file = convert(self.filepath, size=preview_size, cleanup_files=False, transformations=transformations)
return output_file, errors
def get_transformation_string(transformations):
transformation_list = []
errors = []
for transformation in transformations:
try:
if transformation['name'] in TRANFORMATION_CHOICES:
output = TRANFORMATION_CHOICES[transformation['name']] % eval(transformation['arguments'])
transformation_list.append(output)
except Exception, e:
errors.append(e)
tranformation_string = ' '.join(transformation_list)
return tranformation_string, errors

23
apps/sources/tests.py Normal file
View File

@@ -0,0 +1,23 @@
"""
This file demonstrates two different styles of tests (one doctest and one
unittest). These will both pass when you run "manage.py test".
Replace these with more appropriate tests for your application.
"""
from django.test import TestCase
class SimpleTest(TestCase):
def test_basic_addition(self):
"""
Tests that 1 + 1 always equals 2.
"""
self.failUnlessEqual(1 + 1, 2)
__test__ = {"doctest": """
Another way to test that 1 + 1 is equal to 2.
>>> 1 + 1 == 2
True
"""}

27
apps/sources/urls.py Normal file
View File

@@ -0,0 +1,27 @@
from django.conf.urls.defaults import patterns, url
from sources.models import SOURCE_CHOICE_WEB_FORM, SOURCE_CHOICE_STAGING
urlpatterns = patterns('sources.views',
url(r'^staging_file/type/(?P<source_type>\w+)/(?P<source_id>\d+)/(?P<staging_file_id>\w+)/preview/$', 'staging_file_preview', (), 'staging_file_preview'),
url(r'^staging_file/type/(?P<source_type>\w+)/(?P<source_id>\d+)/(?P<staging_file_id>\w+)/delete/$', 'staging_file_delete', (), 'staging_file_delete'),
url(r'^upload/interactive/(?P<source_type>\w+)/(?P<source_id>\d+)/$', 'upload_interactive', (), 'upload_interactive'),
url(r'^upload/interactive/$', 'upload_interactive', (), 'upload_interactive'),
#Setup views
url(r'^setup/interactive/webforms/list/$', 'setup_source_list', {'source_type': SOURCE_CHOICE_WEB_FORM}, 'setup_web_form_list'),
url(r'^setup/interactive/staging_folder/list/$', 'setup_source_list', {'source_type': SOURCE_CHOICE_STAGING}, 'setup_staging_folder_list'),
url(r'^setup/interactive/(?P<source_type>\w+)/list/$', 'setup_source_list', (), 'setup_source_list'),
url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\d+)/edit/$', 'setup_source_edit', (), 'setup_source_edit'),
url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\d+)/delete/$', 'setup_source_delete', (), 'setup_source_delete'),
url(r'^setup/interactive/(?P<source_type>\w+)/create/$', 'setup_source_create', (), 'setup_source_create'),
url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\d+)/transformation/list/$', 'setup_source_transformation_list', (), 'setup_source_transformation_list'),
url(r'^setup/interactive/(?P<source_type>\w+)/(?P<source_id>\d+)/transformation/create/$', 'setup_source_transformation_create', (), 'setup_source_transformation_create'),
url(r'^setup/interactive/source/transformation/(?P<transformation_id>\d+)/edit/$', 'setup_source_transformation_edit', (), 'setup_source_transformation_edit'),
url(r'^setup/interactive/source/transformation/(?P<transformation_id>\d+)/delete/$', 'setup_source_transformation_delete', (), 'setup_source_transformation_delete'),
)

37
apps/sources/utils.py Normal file
View File

@@ -0,0 +1,37 @@
import re
from django.core.exceptions import ValidationError
from django.utils.translation import ugettext
# From http://www.peterbe.com/plog/whitelist-blacklist-logic
def accept_item(value, whitelist, blacklist, default_accept=True):
""" return true if this item is either whitelisted or
not blacklisted """
if not whitelist:
whitelist = []
if not blacklist:
blacklist = []
# note the order
for reject, item_list in ([False, whitelist], [True, blacklist]):
print 'item_list: %s' % item_list
print 'reject: %s' % reject
for okpattern in item_list:
print 'okpattern: %s' % okpattern
if re.findall(okpattern.replace('*','\S+'), value, re.I):
# match!
print 'MATCH'
if reject:
return False
else:
return True
# default is to accept all
return default_accept
def validate_whitelist_blacklist(value, whitelist, blacklist):
print 'blacklist', blacklist
if not accept_item(value, whitelist, blacklist):
raise ValidationError(ugettext(u'Whitelist Blacklist validation error.'))

644
apps/sources/views.py Normal file

File diff suppressed because it is too large Load Diff

22
apps/sources/widgets.py Normal file
View File

@@ -0,0 +1,22 @@
from django import forms
from django.utils.safestring import mark_safe
from django.utils.encoding import force_unicode
class FamFamRadioFieldRenderer(forms.widgets.RadioFieldRenderer):
def render(self):
results = []
results.append(u'<ul>\n')
for w in self:
if w.choice_value:
famfam_template = u'<span class="famfam active famfam-%s" style="vertical-align: bottom;"></span>' % w.choice_value
else:
famfam_template = u'<span class="famfam active famfam-cross" style="vertical-align: bottom;"></span>'
results.append(u'<li class="undecorated_list">%s%s</li>' % (famfam_template, force_unicode(w)))
results.append(u'\n</ul>')
return mark_safe(u'\n'.join(results))
class FamFamRadioSelect(forms.widgets.RadioSelect):
renderer = FamFamRadioFieldRenderer

View File

@@ -10,3 +10,5 @@ django-sentry==1.6.0
django-taggit==0.9.3
-e git://github.com/django-mptt/django-mptt.git@0af02a95877041b2fd6d458bd95413dc1666c321#egg=django-mptt
-e git://github.com/ahupp/python-magic.git@a75cf0a4a7790eb106155c947af9612f15693b6e#egg=python-magic
slate==0.3
PIL==1.1.7

View File

@@ -7,3 +7,5 @@ django-sentry==1.6.0
django-taggit==0.9.3
-e git://github.com/django-mptt/django-mptt.git@0af02a95877041b2fd6d458bd95413dc1666c321#egg=django-mptt
-e git://github.com/ahupp/python-magic.git@a75cf0a4a7790eb106155c947af9612f15693b6e#egg=python-magic
slate==0.3
PIL==1.1.7

View File

@@ -149,6 +149,7 @@ INSTALLED_APPS = (
'grouping',
'mptt',
'document_indexing',
'sources',
)
TEMPLATE_CONTEXT_PROCESSORS = (

Binary file not shown.

After

Width:  |  Height:  |  Size: 785 B

View File

@@ -25,6 +25,7 @@ urlpatterns = patterns('',
(r'^document_indexing/', include('document_indexing.urls')),
(r'^history/', include('history.urls')),
(r'^converter/', include('converter.urls')),
(r'^sources/', include('sources.urls')),
)