diff --git a/apps/common/__init__.py b/apps/common/__init__.py
index e69de29bb2..9562a07710 100755
--- a/apps/common/__init__.py
+++ b/apps/common/__init__.py
@@ -0,0 +1,5 @@
+import tempfile
+
+from common.conf import settings as common_settings
+
+TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
diff --git a/apps/common/utils.py b/apps/common/utils.py
index 8db26008d8..21f3a6d777 100755
--- a/apps/common/utils.py
+++ b/apps/common/utils.py
@@ -1,3 +1,4 @@
+import os
import types
from django.utils.http import urlquote as django_urlquote
@@ -75,3 +76,13 @@ def pretty_size(size):
continue
else:
return round(size/float(lim/2**10),2).__str__()+suf
+
+
+def exists_with_famfam(path):
+ try:
+ if os.path.exists(path):
+ return ''
+ else:
+ return ''
+ except Exception, exc:
+ return exc
diff --git a/apps/converter/__init__.py b/apps/converter/__init__.py
index 8111257f27..9018f28a9a 100755
--- a/apps/converter/__init__.py
+++ b/apps/converter/__init__.py
@@ -1,16 +1,3 @@
-import tempfile
-
-from common.conf import settings as common_settings
-
-TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
-
-#ugettext = lambda s: s
-
-#TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees'))
TRANFORMATION_CHOICES = {
'rotate':'-rotate %(degrees)d'
}
-
-#getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [
-# TRANFORMATION_ROTATE,
-# ])
diff --git a/apps/converter/api.py b/apps/converter/api.py
index 1ecefa5030..933962446d 100755
--- a/apps/converter/api.py
+++ b/apps/converter/api.py
@@ -15,10 +15,9 @@ from converter.conf.settings import LOW_QUALITY_OPTIONS
from converter.conf.settings import HIGH_QUALITY_OPTIONS
#from converter.conf.settings import UNOCONV_PATH
-
-from converter import TEMPORARY_DIRECTORY, TRANFORMATION_CHOICES
-from utils import from_descriptor_to_tempfile
-
+from common import TEMPORARY_DIRECTORY
+from converter import TRANFORMATION_CHOICES
+from documents.utils import document_save_to_temp_dir
QUALITY_DEFAULT = 'quality_default'
QUALITY_LOW = 'quality_low'
@@ -175,10 +174,8 @@ def get_page_count(input_filepath):
#TODO: slugify OCR_OPTIONS and add to file name to cache
def convert_document_for_ocr(document, page=0, format='tif'):
#Extract document file
- document.file.open()
- desc = document.file.storage.open(document.file.path)
- input_filepath = from_descriptor_to_tempfile(desc, document.uuid)
-
+ input_filepath = document_save_to_temp_dir(document, document.uuid)
+
#Convert for OCR
temp_filename, separator = os.path.splitext(os.path.basename(input_filepath))
temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename)
diff --git a/apps/converter/utils.py b/apps/converter/utils.py
index ee50a701d2..2f125733f7 100644
--- a/apps/converter/utils.py
+++ b/apps/converter/utils.py
@@ -1,7 +1,7 @@
import os
import tempfile
-from converter import TEMPORARY_DIRECTORY
+from common import TEMPORARY_DIRECTORY
#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
def copyfile(source, dest, buffer_size=1024*1024):
@@ -24,36 +24,3 @@ def copyfile(source, dest, buffer_size=1024*1024):
source.close()
dest.close()
-
-
-def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024):
- path = os.path.join(TEMPORARY_DIRECTORY, filename)
-
- output_descriptor = open(path, 'wb')
-
- while 1:
- copy_buffer = input_descriptor.read(buffer_size)
- if copy_buffer:
- output_descriptor.write(copy_buffer)
- else:
- break
-
- input_descriptor.close()
- output_descriptor.close()
- return path
-
-
-def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
- output_descriptor, tmp_filename = tempfile.mkstemp()
-
- while 1:
- copy_buffer = input_descriptor.read(buffer_size)
- if copy_buffer:
- #output_descriptor.write(copy_buffer)
- os.write(output_descriptor, copy_buffer)
- else:
- break
-
- input_descriptor.close()
- os.close(output_descriptor)
- return tmp_filename
diff --git a/apps/documents/__init__.py b/apps/documents/__init__.py
index 7b42534141..db0c200332 100755
--- a/apps/documents/__init__.py
+++ b/apps/documents/__init__.py
@@ -13,7 +13,6 @@ from models import Document, DocumentPage, DocumentPageTransformation
from staging import StagingFile
from common.conf import settings as common_settings
-
from conf.settings import ENABLE_SINGLE_DOCUMENT_UPLOAD
PERMISSION_DOCUMENT_CREATE = 'document_create'
@@ -75,6 +74,18 @@ register_links(['document_page_view', 'document_page_transformation_edit', 'docu
register_links(StagingFile, [staging_file_preview, staging_file_delete])
+
+
+def document_exists(document):
+ try:
+ if document.exists():
+ return ''
+ else:
+ return ''
+ except Exception, exc:
+ return exc
+
+
register_model_list_columns(Document, [
{'name':_(u'thumbnail'), 'attribute':
lambda x: '
' % (reverse('document_preview', args=[x.id]),
@@ -83,7 +94,9 @@ register_model_list_columns(Document, [
{'name':_(u'metadata'), 'attribute':
lambda x: ', '.join(['%s - %s' %(metadata.metadata_type, metadata.value) for metadata in x.documentmetadata_set.all()])
},
-
+ {'name':_(u'exists'), 'attribute':
+ lambda x: document_exists(x)
+ },
])
if ENABLE_SINGLE_DOCUMENT_UPLOAD:
@@ -96,5 +109,3 @@ else:
{'text':_('documents'), 'view':'document_create_multiple', 'links':[
document_create_multiple, document_list
],'famfam':'page','position':1}])
-
-TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp()
diff --git a/apps/documents/metadata.py b/apps/documents/metadata.py
new file mode 100644
index 0000000000..08f63008aa
--- /dev/null
+++ b/apps/documents/metadata.py
@@ -0,0 +1,54 @@
+from urllib import unquote_plus
+
+from django.shortcuts import get_object_or_404
+from django.core.exceptions import ObjectDoesNotExist
+
+
+from models import DocumentMetadata, MetadataType
+
+def decode_metadata_from_url(url_dict):
+ metadata_dict = {
+ 'id':{},
+ 'value':{}
+ }
+ metadata_list = []
+ #Match out of order metadata_type ids with metadata values from request
+ for key, value in url_dict.items():
+ if 'metadata' in key:
+ index, element = key[8:].split('_')
+ metadata_dict[element][index] = value
+
+ #Convert the nested dictionary into a list of id+values dictionaries
+ for order, id in metadata_dict['id'].items():
+ if order in metadata_dict['value'].keys():
+ metadata_list.append({'id':id, 'value':metadata_dict['value'][order]})
+
+ return metadata_list
+
+
+def save_metadata_list(metadata_list, document):
+ for item in metadata_list:
+ if item['value']:
+ save_metadata(item, document)
+ else:
+ try:
+ metadata_type = MetadataType.objects.get(id=item['id'])
+ document_metadata = DocumentMetadata.objects.get(document=document,
+ metadata_type=metadata_type)
+ document_metadata.delete()
+ except ObjectDoesNotExist:
+ pass
+
+
+def save_metadata(metadata_dict, document):
+ #Use matched metadata now to create document metadata
+ document_metadata, created = DocumentMetadata.objects.get_or_create(
+ document=document,
+ metadata_type=get_object_or_404(MetadataType, pk=metadata_dict['id']),
+ )
+ #Handle 'plus sign as space' in the url
+
+ #unquote_plus handles utf-8?!?
+ #http://stackoverflow.com/questions/4382875/handling-iri-in-django
+ document_metadata.value=unquote_plus(metadata_dict['value'])#.decode('utf-8')
+ document_metadata.save()
diff --git a/apps/documents/models.py b/apps/documents/models.py
index 867cf84a8d..50091df2e0 100755
--- a/apps/documents/models.py
+++ b/apps/documents/models.py
@@ -19,7 +19,6 @@ from documents.conf.settings import STORAGE_BACKEND
from documents.conf.settings import AVAILABLE_TRANSFORMATIONS
from documents.conf.settings import DEFAULT_TRANSFORMATIONS
-
def get_filename_from_uuid(instance, filename):
filename, extension = os.path.splitext(filename)
instance.file_filename = filename
@@ -80,9 +79,9 @@ class Document(models.Model):
def update_mimetype(self, save=True):
try:
mime = magic.Magic(mime=True)
- self.file_mimetype = mime.from_buffer(self.read())
+ self.file_mimetype = mime.from_buffer(self.open().read())
mime_encoding = magic.Magic(mime_encoding=True)
- self.file_mime_encoding = mime_encoding.from_buffer(self.read())
+ self.file_mime_encoding = mime_encoding.from_buffer(self.open().read())
except:
self.file_mimetype = u'unknown'
self.file_mime_encoding = u'unknown'
@@ -119,8 +118,8 @@ class Document(models.Model):
def save_to_file(self, filepath, buffer_size=1024*1024):
input_descriptor = self.open()
output_descriptor = open(filepath, 'wb')
- while 1:
- copy_buffer = input_descriptor.read()
+ while True:
+ copy_buffer = input_descriptor.read(buffer_size)
if copy_buffer:
output_descriptor.write(copy_buffer)
else:
@@ -129,7 +128,7 @@ class Document(models.Model):
output_descriptor.close()
return filepath
-
+
def exists(self):
return self.file.storage.exists(self.file.path)
diff --git a/apps/documents/utils.py b/apps/documents/utils.py
index 08e9e11b30..9e4fa81848 100755
--- a/apps/documents/utils.py
+++ b/apps/documents/utils.py
@@ -1,14 +1,9 @@
import os
import tempfile
-from urllib import unquote_plus
-
-from django.shortcuts import get_object_or_404
-from django.core.exceptions import ObjectDoesNotExist
-from documents import TEMPORARY_DIRECTORY
+from common import TEMPORARY_DIRECTORY
-from models import Document, DocumentMetadata, MetadataType
#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
def copyfile(source, dest, buffer_size=1024*1024):
@@ -22,7 +17,7 @@ def copyfile(source, dest, buffer_size=1024*1024):
if not hasattr(dest, 'write'):
dest = open(dest, 'wb')
- while 1:
+ while True:
copy_buffer = source.read(buffer_size)
if copy_buffer:
dest.write(copy_buffer)
@@ -33,82 +28,8 @@ def copyfile(source, dest, buffer_size=1024*1024):
dest.close()
-def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024):
- path = os.path.join(TEMPORARY_DIRECTORY, filename)
-
- output_descriptor = open(path, 'wb')
-
- while 1:
- copy_buffer = input_descriptor.read(buffer_size)
- if copy_buffer:
- output_descriptor.write(copy_buffer)
- else:
- break
-
- input_descriptor.close()
- output_descriptor.close()
- return path
+def document_save_to_temp_dir(document, filename, buffer_size=1024*1024):
+ temporary_path = os.path.join(TEMPORARY_DIRECTORY, filename)
+ return document.save_to_file(temporary_path, buffer_size)
-def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024):
- output_descriptor, tmp_filename = tempfile.mkstemp()
-
- while 1:
- copy_buffer = input_descriptor.read(buffer_size)
- if copy_buffer:
- #output_descriptor.write(copy_buffer)
- os.write(output_descriptor, copy_buffer)
- else:
- break
-
- input_descriptor.close()
- os.close(output_descriptor)
- return tmp_filename
-
-
-def decode_metadata_from_url(url_dict):
- metadata_dict = {
- 'id':{},
- 'value':{}
- }
- metadata_list = []
- #Match out of order metadata_type ids with metadata values from request
- for key, value in url_dict.items():
- if 'metadata' in key:
- index, element = key[8:].split('_')
- metadata_dict[element][index] = value
-
- #Convert the nested dictionary into a list of id+values dictionaries
- for order, id in metadata_dict['id'].items():
- if order in metadata_dict['value'].keys():
- metadata_list.append({'id':id, 'value':metadata_dict['value'][order]})
-
- return metadata_list
-
-
-def save_metadata_list(metadata_list, document):
- for item in metadata_list:
- if item['value']:
- save_metadata(item, document)
- else:
- try:
- metadata_type = MetadataType.objects.get(id=item['id'])
- document_metadata = DocumentMetadata.objects.get(document=document,
- metadata_type=metadata_type)
- document_metadata.delete()
- except ObjectDoesNotExist:
- pass
-
-
-def save_metadata(metadata_dict, document):
- #Use matched metadata now to create document metadata
- document_metadata, created = DocumentMetadata.objects.get_or_create(
- document=document,
- metadata_type=get_object_or_404(MetadataType, pk=metadata_dict['id']),
- )
- #Handle 'plus sign as space' in the url
-
- #unquote_plus handles utf-8?!?
- #http://stackoverflow.com/questions/4382875/handling-iri-in-django
- document_metadata.value=unquote_plus(metadata_dict['value'])#.decode('utf-8')
- document_metadata.save()
diff --git a/apps/documents/views.py b/apps/documents/views.py
index fe887c8f60..a9bb93e6f9 100755
--- a/apps/documents/views.py
+++ b/apps/documents/views.py
@@ -16,26 +16,14 @@ from django.core.exceptions import ObjectDoesNotExist
from django.core.files.uploadedfile import SimpleUploadedFile
from common.utils import pretty_size
-from permissions.api import check_permissions, Unauthorized
-from filetransfers.api import serve_file
from converter.api import convert, in_image_cache, QUALITY_DEFAULT
from converter import TRANFORMATION_CHOICES
+from filetransfers.api import serve_file
from filesystem_serving.api import document_create_fs_links, document_delete_fs_links
-
-
-from utils import from_descriptor_to_tempfile
-
-from models import Document, DocumentMetadata, DocumentType, MetadataType, \
- DocumentPage, DocumentPageTransformation
-
-from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
- MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
- StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
- MetadataFormSet, DocumentPageForm, DocumentPageTransformationForm
-
-from staging import StagingFile
-
+from filesystem_serving.conf.settings import FILESERVING_ENABLE
from ocr.models import add_document_to_queue
+from permissions.api import check_permissions, Unauthorized
+
from documents.conf.settings import DELETE_STAGING_FILE_AFTER_UPLOAD
from documents.conf.settings import USE_STAGING_DIRECTORY
@@ -49,16 +37,23 @@ from documents.conf.settings import AUTOMATIC_OCR
from documents.conf.settings import UNCOMPRESS_COMPRESSED_LOCAL_FILES
from documents.conf.settings import UNCOMPRESS_COMPRESSED_STAGING_FILES
-from filesystem_serving.conf.settings import FILESERVING_ENABLE
-
-
from documents import PERMISSION_DOCUMENT_CREATE, \
PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \
PERMISSION_DOCUMENT_METADATA_EDIT, PERMISSION_DOCUMENT_VIEW, \
PERMISSION_DOCUMENT_DELETE, PERMISSION_DOCUMENT_DOWNLOAD, \
PERMISSION_DOCUMENT_TRANSFORM, PERMISSION_DOCUMENT_TOOLS
+
+from forms import DocumentTypeSelectForm, DocumentCreateWizard, \
+ MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \
+ StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \
+ MetadataFormSet, DocumentPageForm, DocumentPageTransformationForm
-from utils import save_metadata, save_metadata_list, decode_metadata_from_url
+from metadata import save_metadata, save_metadata_list, decode_metadata_from_url
+from models import Document, DocumentMetadata, DocumentType, MetadataType, \
+ DocumentPage, DocumentPageTransformation
+from staging import StagingFile
+from utils import document_save_to_temp_dir
+
def document_list(request):
permissions = [PERMISSION_DOCUMENT_VIEW]
@@ -249,7 +244,7 @@ def upload_document_with_type(request, document_type_id, multiple=True):
return render_to_response('generic_form.html', context,
context_instance=RequestContext(request))
-
+
def document_view(request, document_id):
permissions = [PERMISSION_DOCUMENT_VIEW]
try:
@@ -503,10 +498,11 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_
if filepath:
return serve_file(request, File(file=open(filepath, 'r')))
#Save to a temporary location
- filepath = from_descriptor_to_tempfile(document.open(), document.checksum)
+ filepath = document_save_to_temp_dir(document, filename=document.checksum)
output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1)
return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg')
except Exception, e:
+ #messages.error(request, e)
if size == THUMBNAIL_SIZE:
return serve_file(request, File(file=open('%simages/picture_error.png' % settings.MEDIA_ROOT, 'r')))
else:
diff --git a/apps/main/views.py b/apps/main/views.py
index 76333be0e8..62c9fc14dd 100755
--- a/apps/main/views.py
+++ b/apps/main/views.py
@@ -1,10 +1,11 @@
-import os
import types
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.utils.translation import ugettext_lazy as _
+from common.utils import exists_with_famfam
+
from common.conf import settings as common_settings
from documents.conf import settings as documents_settings
from converter.conf import settings as converter_settings
@@ -68,7 +69,7 @@ def check_settings(request):
'extra_columns':[
{'name':_(u'name'), 'attribute':'name'},
{'name':_(u'value'), 'attribute': lambda x: _return_type(x['value'])},
- {'name':_(u'exists'), 'attribute':lambda x: _exists(x['value']) if 'exists' in x else ''},
+ {'name':_(u'exists'), 'attribute':lambda x: exists_with_famfam(x['value']) if 'exists' in x else ''},
]
}
@@ -88,15 +89,6 @@ def _return_type(value):
else:
return value
-def _exists(path):
- try:
- if os.path.exists(path):
- return ''
- else:
- return ''
- except Exception, exc:
- return exc
-
def blank_menu(request):
return render_to_response('generic_template.html', {
diff --git a/apps/ocr/api.py b/apps/ocr/api.py
index f244b45ae5..86338de21f 100755
--- a/apps/ocr/api.py
+++ b/apps/ocr/api.py
@@ -8,7 +8,7 @@ import tempfile
from django.utils.translation import ugettext as _
from django.contrib import messages
-from common.conf.settings import TEMPORARY_DIRECTORY
+from common import TEMPORARY_DIRECTORY
from documents.models import Document
diff --git a/apps/storage/backends/gridfsstorage.py b/apps/storage/backends/gridfsstorage.py
new file mode 100644
index 0000000000..c6923c342c
--- /dev/null
+++ b/apps/storage/backends/gridfsstorage.py
@@ -0,0 +1,95 @@
+import os
+
+from django.core.files.storage import Storage
+from django.utils.encoding import force_unicode
+
+from pymongo import Connection
+from gridfs import GridFS
+
+HOST = u'localhost'
+PORT = 27017
+DATABASE_NAME = u'document_storage'
+
+class GridFSStorage(Storage):
+ def __init__(self, *args, **kwargs):
+ self.db = Connection(host=HOST, port=PORT)[DATABASE_NAME]
+ self.fs = GridFS(self.db)
+
+
+ def save(self, name, content):
+ #TODO: if exists adding _ plus a counter
+ while True:
+ try:
+ # This file has a file path that we can move.
+ if hasattr(content, 'temporary_file_path'):
+ self.move(content.temporary_file_path(), name)
+ content.close()
+ # This is a normal uploadedfile that we can stream.
+ else:
+ # This fun binary flag incantation makes os.open throw an
+ # OSError if the file already exists before we open it.
+ newfile = self.fs.new_file(filename=name)
+ try:
+ for chunk in content.chunks():
+ newfile.write(chunk)
+ finally:
+ newfile.close()
+ except Exception, e:#OSError, e:
+ # if e.errno == errno.EEXIST:
+ # # Ooops, the file exists. We need a new file name.
+ # name = self.get_available_name(name)
+ # full_path = self.path(name)
+ # else:
+ # raise
+ raise
+ else:
+ # OK, the file save worked. Break out of the loop.
+ break
+
+ return name
+
+
+ def open(self, name, *args, **kwars):
+ return self.fs.get_last_version(name)
+
+
+ def delete(self, name):
+ oid = self.fs.get_last_version(name)
+ self.delete(oid)
+ return True
+
+
+ def exists(self, name):
+ return self.fs.exists(filename=name)
+
+
+ def path(self, name):
+ return force_unicode(name)
+
+
+ def move(self, old_file_name, name, chunk_size=1024*64):
+ # first open the old file, so that it won't go away
+ old_file = open(old_file_name, 'rb')
+ try:
+ newfile = self.fs.new_file(filename=name)
+
+ try:
+ current_chunk = None
+ while current_chunk != '':
+ current_chunk = old_file.read(chunk_size)
+ newfile.write(current_chunk)
+ finally:
+ newfile.close()
+ finally:
+ old_file.close()
+
+ try:
+ os.remove(old_file_name)
+ except OSError, e:
+ # Certain operating systems (Cygwin and Windows)
+ # fail when deleting opened files, ignore it. (For the
+ # systems where this happens, temporary files will be auto-deleted
+ # on close anyway.)
+ if getattr(e, 'winerror', 0) != 32 and getattr(e, 'errno', 0) != 13:
+ raise
+