Initial version of the GridFS storage driver

This commit is contained in:
Roberto Rosario
2011-03-04 01:08:20 -04:00
parent 3f71ee1a06
commit d0bea8ffeb
13 changed files with 218 additions and 183 deletions

View File

@@ -0,0 +1,5 @@
import tempfile
from common.conf import settings as common_settings
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()

View File

@@ -1,3 +1,4 @@
import os
import types import types
from django.utils.http import urlquote as django_urlquote from django.utils.http import urlquote as django_urlquote
@@ -75,3 +76,13 @@ def pretty_size(size):
continue continue
else: else:
return round(size/float(lim/2**10),2).__str__()+suf return round(size/float(lim/2**10),2).__str__()+suf
def exists_with_famfam(path):
try:
if os.path.exists(path):
return '<span class="famfam active famfam-tick"></span>'
else:
return '<span class="famfam active famfam-cross"></span>'
except Exception, exc:
return exc

View File

@@ -1,16 +1,3 @@
import tempfile
from common.conf import settings as common_settings
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
#ugettext = lambda s: s
#TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
TRANFORMATION_CHOICES = { TRANFORMATION_CHOICES = {
'rotate':'-rotate %(degrees)d' 'rotate':'-rotate %(degrees)d'
} }
#getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
# TRANFORMATION_ROTATE,
# ])

View File

@@ -15,10 +15,9 @@ from converter.conf.settings import LOW_QUALITY_OPTIONS
from converter.conf.settings import HIGH_QUALITY_OPTIONS from converter.conf.settings import HIGH_QUALITY_OPTIONS
#from converter.conf.settings import UNOCONV_PATH #from converter.conf.settings import UNOCONV_PATH
from common import TEMPORARY_DIRECTORY
from converter import TEMPORARY_DIRECTORY, TRANFORMATION_CHOICES from converter import TRANFORMATION_CHOICES
from utils import from_descriptor_to_tempfile from documents.utils import document_save_to_temp_dir
QUALITY_DEFAULT = 'quality_default' QUALITY_DEFAULT = 'quality_default'
QUALITY_LOW = 'quality_low' QUALITY_LOW = 'quality_low'
@@ -175,9 +174,7 @@ def get_page_count(input_filepath):
#TODO: slugify OCR_OPTIONS and add to file name to cache #TODO: slugify OCR_OPTIONS and add to file name to cache
def convert_document_for_ocr(document, page=0, format='tif'): def convert_document_for_ocr(document, page=0, format='tif'):
#Extract document file #Extract document file
document.file.open() input_filepath = document_save_to_temp_dir(document, document.uuid)
desc = document.file.storage.open(document.file.path)
input_filepath = from_descriptor_to_tempfile(desc, document.uuid)
#Convert for OCR #Convert for OCR
temp_filename, separator = os.path.splitext(os.path.basename(input_filepath)) temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))

View File

@@ -1,7 +1,7 @@
import os import os
import tempfile import tempfile
from converter import TEMPORARY_DIRECTORY from common import TEMPORARY_DIRECTORY
#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python #http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
def copyfile(source, dest, buffer_size=1024*1024): def copyfile(source, dest, buffer_size=1024*1024):
@@ -24,36 +24,3 @@ def copyfile(source, dest, buffer_size=1024*1024):
source.close() source.close()
dest.close() dest.close()
def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024):
path = os.path.join(TEMPORARY_DIRECTORY, filename)
output_descriptor = open(path, 'wb')
while 1:
copy_buffer = input_descriptor.read(buffer_size)
if copy_buffer:
output_descriptor.write(copy_buffer)
else:
break
input_descriptor.close()
output_descriptor.close()
return path
def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
output_descriptor, tmp_filename = tempfile.mkstemp()
while 1:
copy_buffer = input_descriptor.read(buffer_size)
if copy_buffer:
#output_descriptor.write(copy_buffer)
os.write(output_descriptor, copy_buffer)
else:
break
input_descriptor.close()
os.close(output_descriptor)
return tmp_filename

View File

@@ -13,7 +13,6 @@ from models import Document, DocumentPage, DocumentPageTransformation
from staging import StagingFile from staging import StagingFile
from common.conf import settings as common_settings from common.conf import settings as common_settings
from conf.settings import ENABLE_SINGLE_DOCUMENT_UPLOAD from conf.settings import ENABLE_SINGLE_DOCUMENT_UPLOAD
PERMISSION_DOCUMENT_CREATE = 'document_create' PERMISSION_DOCUMENT_CREATE = 'document_create'
@@ -75,6 +74,18 @@ register_links(['document_page_view', 'document_page_transformation_edit', 'docu
register_links(StagingFile, [staging_file_preview, staging_file_delete]) register_links(StagingFile, [staging_file_preview, staging_file_delete])
def document_exists(document):
try:
if document.exists():
return '<span class="famfam active famfam-tick"></span>'
else:
return '<span class="famfam active famfam-cross"></span>'
except Exception, exc:
return exc
register_model_list_columns(Document, [ register_model_list_columns(Document, [
{'name':_(u'thumbnail'), 'attribute': {'name':_(u'thumbnail'), 'attribute':
lambda x: '<a class="fancybox" href="%s"><img src="%s" /></a>' % (reverse('document_preview', args=[x.id]), lambda x: '<a class="fancybox" href="%s"><img src="%s" /></a>' % (reverse('document_preview', args=[x.id]),
@@ -83,7 +94,9 @@ register_model_list_columns(Document, [
{'name':_(u'metadata'), 'attribute': {'name':_(u'metadata'), 'attribute':
lambda x: ', '.join(['%s - %s' %(metadata.metadata_type, metadata.value) for metadata in x.documentmetadata_set.all()]) lambda x: ', '.join(['%s - %s' %(metadata.metadata_type, metadata.value) for metadata in x.documentmetadata_set.all()])
}, },
{'name':_(u'exists'), 'attribute':
lambda x: document_exists(x)
},
]) ])
if ENABLE_SINGLE_DOCUMENT_UPLOAD: if ENABLE_SINGLE_DOCUMENT_UPLOAD:
@@ -96,5 +109,3 @@ else:
{'text':_('documents'), 'view':'document_create_multiple', 'links':[ {'text':_('documents'), 'view':'document_create_multiple', 'links':[
document_create_multiple, document_list document_create_multiple, document_list
],'famfam':'page','position':1}]) ],'famfam':'page','position':1}])
TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()

View File

@@ -0,0 +1,54 @@
from urllib import unquote_plus
from django.shortcuts import get_object_or_404
from django.core.exceptions import ObjectDoesNotExist
from models import DocumentMetadata, MetadataType
def decode_metadata_from_url(url_dict):
metadata_dict = {
'id':{},
'value':{}
}
metadata_list = []
#Match out of order metadata_type ids with metadata values from request
for key, value in url_dict.items():
if 'metadata' in key:
index, element = key[8:].split('_')
metadata_dict[element][index] = value
#Convert the nested dictionary into a list of id+values dictionaries
for order, id in metadata_dict['id'].items():
if order in metadata_dict['value'].keys():
metadata_list.append({'id':id, 'value':metadata_dict['value'][order]})
return metadata_list
def save_metadata_list(metadata_list, document):
for item in metadata_list:
if item['value']:
save_metadata(item, document)
else:
try:
metadata_type = MetadataType.objects.get(id=item['id'])
document_metadata = DocumentMetadata.objects.get(document=document,
metadata_type=metadata_type)
document_metadata.delete()
except ObjectDoesNotExist:
pass
def save_metadata(metadata_dict, document):
#Use matched metadata now to create document metadata
document_metadata, created = DocumentMetadata.objects.get_or_create(
document=document,
metadata_type=get_object_or_404(MetadataType, pk=metadata_dict['id']),
)
#Handle 'plus sign as space' in the url
#unquote_plus handles utf-8?!?
#http://stackoverflow.com/questions/4382875/handling-iri-in-django
document_metadata.value=unquote_plus(metadata_dict['value'])#.decode('utf-8')
document_metadata.save()

View File

@@ -19,7 +19,6 @@ from documents.conf.settings import STORAGE_BACKEND
from documents.conf.settings import AVAILABLE_TRANSFORMATIONS from documents.conf.settings import AVAILABLE_TRANSFORMATIONS
from documents.conf.settings import DEFAULT_TRANSFORMATIONS from documents.conf.settings import DEFAULT_TRANSFORMATIONS
def get_filename_from_uuid(instance, filename): def get_filename_from_uuid(instance, filename):
filename, extension = os.path.splitext(filename) filename, extension = os.path.splitext(filename)
instance.file_filename = filename instance.file_filename = filename
@@ -80,9 +79,9 @@ class Document(models.Model):
def update_mimetype(self, save=True): def update_mimetype(self, save=True):
try: try:
mime = magic.Magic(mime=True) mime = magic.Magic(mime=True)
self.file_mimetype = mime.from_buffer(self.read()) self.file_mimetype = mime.from_buffer(self.open().read())
mime_encoding = magic.Magic(mime_encoding=True) mime_encoding = magic.Magic(mime_encoding=True)
self.file_mime_encoding = mime_encoding.from_buffer(self.read()) self.file_mime_encoding = mime_encoding.from_buffer(self.open().read())
except: except:
self.file_mimetype = u'unknown' self.file_mimetype = u'unknown'
self.file_mime_encoding = u'unknown' self.file_mime_encoding = u'unknown'
@@ -119,8 +118,8 @@ class Document(models.Model):
def save_to_file(self, filepath, buffer_size=1024*1024): def save_to_file(self, filepath, buffer_size=1024*1024):
input_descriptor = self.open() input_descriptor = self.open()
output_descriptor = open(filepath, 'wb') output_descriptor = open(filepath, 'wb')
while 1: while True:
copy_buffer = input_descriptor.read() copy_buffer = input_descriptor.read(buffer_size)
if copy_buffer: if copy_buffer:
output_descriptor.write(copy_buffer) output_descriptor.write(copy_buffer)
else: else:

View File

@@ -1,14 +1,9 @@
import os import os
import tempfile import tempfile
from urllib import unquote_plus
from django.shortcuts import get_object_or_404
from django.core.exceptions import ObjectDoesNotExist
from documents import TEMPORARY_DIRECTORY from common import TEMPORARY_DIRECTORY
from models import Document, DocumentMetadata, MetadataType
#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python #http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
def copyfile(source, dest, buffer_size=1024*1024): def copyfile(source, dest, buffer_size=1024*1024):
@@ -22,7 +17,7 @@ def copyfile(source, dest, buffer_size=1024*1024):
if not hasattr(dest, 'write'): if not hasattr(dest, 'write'):
dest = open(dest, 'wb') dest = open(dest, 'wb')
while 1: while True:
copy_buffer = source.read(buffer_size) copy_buffer = source.read(buffer_size)
if copy_buffer: if copy_buffer:
dest.write(copy_buffer) dest.write(copy_buffer)
@@ -33,82 +28,8 @@ def copyfile(source, dest, buffer_size=1024*1024):
dest.close() dest.close()
def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024): def document_save_to_temp_dir(document, filename, buffer_size=1024*1024):
path = os.path.join(TEMPORARY_DIRECTORY, filename) temporary_path = os.path.join(TEMPORARY_DIRECTORY, filename)
return document.save_to_file(temporary_path, buffer_size)
output_descriptor = open(path, 'wb')
while 1:
copy_buffer = input_descriptor.read(buffer_size)
if copy_buffer:
output_descriptor.write(copy_buffer)
else:
break
input_descriptor.close()
output_descriptor.close()
return path
def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
output_descriptor, tmp_filename = tempfile.mkstemp()
while 1:
copy_buffer = input_descriptor.read(buffer_size)
if copy_buffer:
#output_descriptor.write(copy_buffer)
os.write(output_descriptor, copy_buffer)
else:
break
input_descriptor.close()
os.close(output_descriptor)
return tmp_filename
def decode_metadata_from_url(url_dict):
metadata_dict = {
'id':{},
'value':{}
}
metadata_list = []
#Match out of order metadata_type ids with metadata values from request
for key, value in url_dict.items():
if 'metadata' in key:
index, element = key[8:].split('_')
metadata_dict[element][index] = value
#Convert the nested dictionary into a list of id+values dictionaries
for order, id in metadata_dict['id'].items():
if order in metadata_dict['value'].keys():
metadata_list.append({'id':id, 'value':metadata_dict['value'][order]})
return metadata_list
def save_metadata_list(metadata_list, document):
for item in metadata_list:
if item['value']:
save_metadata(item, document)
else:
try:
metadata_type = MetadataType.objects.get(id=item['id'])
document_metadata = DocumentMetadata.objects.get(document=document,
metadata_type=metadata_type)
document_metadata.delete()
except ObjectDoesNotExist:
pass
def save_metadata(metadata_dict, document):
#Use matched metadata now to create document metadata
document_metadata, created = DocumentMetadata.objects.get_or_create(
document=document,
metadata_type=get_object_or_404(MetadataType, pk=metadata_dict['id']),
)
#Handle 'plus sign as space' in the url
#unquote_plus handles utf-8?!?
#http://stackoverflow.com/questions/4382875/handling-iri-in-django
document_metadata.value=unquote_plus(metadata_dict['value'])#.decode('utf-8')
document_metadata.save()

View File

@@ -16,26 +16,14 @@ from django.core.exceptions import ObjectDoesNotExist
from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadedfile import SimpleUploadedFile
from common.utils import pretty_size from common.utils import pretty_size
from permissions.api import check_permissions, Unauthorized
from filetransfers.api import serve_file
from converter.api import convert, in_image_cache, QUALITY_DEFAULT from converter.api import convert, in_image_cache, QUALITY_DEFAULT
from converter import TRANFORMATION_CHOICES from converter import TRANFORMATION_CHOICES
from filetransfers.api import serve_file
from filesystem_serving.api import document_create_fs_links, document_delete_fs_links from filesystem_serving.api import document_create_fs_links, document_delete_fs_links
from filesystem_serving.conf.settings import FILESERVING_ENABLE
from utils import from_descriptor_to_tempfile
from models import Document, DocumentMetadata, DocumentType, MetadataType, \
DocumentPage, DocumentPageTransformation
from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
MetadataFormSet, DocumentPageForm, DocumentPageTransformationForm
from staging import StagingFile
from ocr.models import add_document_to_queue from ocr.models import add_document_to_queue
from permissions.api import check_permissions, Unauthorized
from documents.conf.settings import DELETE_STAGING_FILE_AFTER_UPLOAD from documents.conf.settings import DELETE_STAGING_FILE_AFTER_UPLOAD
from documents.conf.settings import USE_STAGING_DIRECTORY from documents.conf.settings import USE_STAGING_DIRECTORY
@@ -49,16 +37,23 @@ from documents.conf.settings import AUTOMATIC_OCR
from documents.conf.settings import UNCOMPRESS_COMPRESSED_LOCAL_FILES from documents.conf.settings import UNCOMPRESS_COMPRESSED_LOCAL_FILES
from documents.conf.settings import UNCOMPRESS_COMPRESSED_STAGING_FILES from documents.conf.settings import UNCOMPRESS_COMPRESSED_STAGING_FILES
from filesystem_serving.conf.settings import FILESERVING_ENABLE
from documents import PERMISSION_DOCUMENT_CREATE, \ from documents import PERMISSION_DOCUMENT_CREATE, \
PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \ PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \
PERMISSION_DOCUMENT_METADATA_EDIT, PERMISSION_DOCUMENT_VIEW, \ PERMISSION_DOCUMENT_METADATA_EDIT, PERMISSION_DOCUMENT_VIEW, \
PERMISSION_DOCUMENT_DELETE, PERMISSION_DOCUMENT_DOWNLOAD, \ PERMISSION_DOCUMENT_DELETE, PERMISSION_DOCUMENT_DOWNLOAD, \
PERMISSION_DOCUMENT_TRANSFORM, PERMISSION_DOCUMENT_TOOLS PERMISSION_DOCUMENT_TRANSFORM, PERMISSION_DOCUMENT_TOOLS
from utils import save_metadata, save_metadata_list, decode_metadata_from_url from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
MetadataFormSet, DocumentPageForm, DocumentPageTransformationForm
from metadata import save_metadata, save_metadata_list, decode_metadata_from_url
from models import Document, DocumentMetadata, DocumentType, MetadataType, \
DocumentPage, DocumentPageTransformation
from staging import StagingFile
from utils import document_save_to_temp_dir
def document_list(request): def document_list(request):
permissions = [PERMISSION_DOCUMENT_VIEW] permissions = [PERMISSION_DOCUMENT_VIEW]
@@ -503,10 +498,11 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
if filepath: if filepath:
return serve_file(request, File(file=open(filepath, 'r'))) return serve_file(request, File(file=open(filepath, 'r')))
#Save to a temporary location #Save to a temporary location
filepath = from_descriptor_to_tempfile(document.open(), document.checksum) filepath = document_save_to_temp_dir(document, filename=document.checksum)
output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1) output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1)
return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg') return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg')
except Exception, e: except Exception, e:
#messages.error(request, e)
if size == THUMBNAIL_SIZE: if size == THUMBNAIL_SIZE:
return serve_file(request, File(file=open('%simages/picture_error.png' % settings.MEDIA_ROOT, 'r'))) return serve_file(request, File(file=open('%simages/picture_error.png' % settings.MEDIA_ROOT, 'r')))
else: else:

View File

@@ -1,10 +1,11 @@
import os
import types import types
from django.shortcuts import render_to_response from django.shortcuts import render_to_response
from django.template import RequestContext from django.template import RequestContext
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from common.utils import exists_with_famfam
from common.conf import settings as common_settings from common.conf import settings as common_settings
from documents.conf import settings as documents_settings from documents.conf import settings as documents_settings
from converter.conf import settings as converter_settings from converter.conf import settings as converter_settings
@@ -68,7 +69,7 @@ def check_settings(request):
'extra_columns':[ 'extra_columns':[
{'name':_(u'name'), 'attribute':'name'}, {'name':_(u'name'), 'attribute':'name'},
{'name':_(u'value'), 'attribute': lambda x: _return_type(x['value'])}, {'name':_(u'value'), 'attribute': lambda x: _return_type(x['value'])},
{'name':_(u'exists'), 'attribute':lambda x: _exists(x['value']) if 'exists' in x else ''}, {'name':_(u'exists'), 'attribute':lambda x: exists_with_famfam(x['value']) if 'exists' in x else ''},
] ]
} }
@@ -88,15 +89,6 @@ def _return_type(value):
else: else:
return value return value
def _exists(path):
try:
if os.path.exists(path):
return '<span class="famfam active famfam-tick"></span>'
else:
return '<span class="famfam active famfam-cross"></span>'
except Exception, exc:
return exc
def blank_menu(request): def blank_menu(request):
return render_to_response('generic_template.html', { return render_to_response('generic_template.html', {

View File

@@ -8,7 +8,7 @@ import tempfile
from django.utils.translation import ugettext as _ from django.utils.translation import ugettext as _
from django.contrib import messages from django.contrib import messages
from common.conf.settings import TEMPORARY_DIRECTORY from common import TEMPORARY_DIRECTORY
from documents.models import Document from documents.models import Document

View File

@@ -0,0 +1,95 @@
import os
from django.core.files.storage import Storage
from django.utils.encoding import force_unicode
from pymongo import Connection
from gridfs import GridFS
HOST = u'localhost'
PORT = 27017
DATABASE_NAME = u'document_storage'
class GridFSStorage(Storage):
def __init__(self, *args, **kwargs):
self.db = Connection(host=HOST, port=PORT)[DATABASE_NAME]
self.fs = GridFS(self.db)
def save(self, name, content):
#TODO: if exists adding _ plus a counter
while True:
try:
# This file has a file path that we can move.
if hasattr(content, 'temporary_file_path'):
self.move(content.temporary_file_path(), name)
content.close()
# This is a normal uploadedfile that we can stream.
else:
# This fun binary flag incantation makes os.open throw an
# OSError if the file already exists before we open it.
newfile = self.fs.new_file(filename=name)
try:
for chunk in content.chunks():
newfile.write(chunk)
finally:
newfile.close()
except Exception, e:#OSError, e:
# if e.errno == errno.EEXIST:
# # Ooops, the file exists. We need a new file name.
# name = self.get_available_name(name)
# full_path = self.path(name)
# else:
# raise
raise
else:
# OK, the file save worked. Break out of the loop.
break
return name
def open(self, name, *args, **kwars):
return self.fs.get_last_version(name)
def delete(self, name):
oid = self.fs.get_last_version(name)
self.delete(oid)
return True
def exists(self, name):
return self.fs.exists(filename=name)
def path(self, name):
return force_unicode(name)
def move(self, old_file_name, name, chunk_size=1024*64):
# first open the old file, so that it won't go away
old_file = open(old_file_name, 'rb')
try:
newfile = self.fs.new_file(filename=name)
try:
current_chunk = None
while current_chunk != '':
current_chunk = old_file.read(chunk_size)
newfile.write(current_chunk)
finally:
newfile.close()
finally:
old_file.close()
try:
os.remove(old_file_name)
except OSError, e:
# Certain operating systems (Cygwin and Windows)
# fail when deleting opened files, ignore it. (For the
# systems where this happens, temporary files will be auto-deleted
# on close anyway.)
if getattr(e, 'winerror', 0) != 32 and getattr(e, 'errno', 0) != 13:
raise