diff --git a/apps/common/__init__.py b/apps/common/__init__.py index e69de29bb2..9562a07710 100755 --- a/apps/common/__init__.py +++ b/apps/common/__init__.py @@ -0,0 +1,5 @@ +import tempfile + +from common.conf import settings as common_settings + +TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp() diff --git a/apps/common/utils.py b/apps/common/utils.py index 8db26008d8..21f3a6d777 100755 --- a/apps/common/utils.py +++ b/apps/common/utils.py @@ -1,3 +1,4 @@ +import os import types from django.utils.http import urlquote as django_urlquote @@ -75,3 +76,13 @@ def pretty_size(size): continue else: return round(size/float(lim/2**10),2).__str__()+suf + + +def exists_with_famfam(path): + try: + if os.path.exists(path): + return '' + else: + return '' + except Exception, exc: + return exc diff --git a/apps/converter/__init__.py b/apps/converter/__init__.py index 8111257f27..9018f28a9a 100755 --- a/apps/converter/__init__.py +++ b/apps/converter/__init__.py @@ -1,16 +1,3 @@ -import tempfile - -from common.conf import settings as common_settings - -TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp() - -#ugettext = lambda s: s - -#TRANFORMATION_ROTATE = (u'-rotate %(degrees)d', ugettext(u'Rotation, arguments: degrees')) TRANFORMATION_CHOICES = { 'rotate':'-rotate %(degrees)d' } - -#getattr(settings, 'CONVERTER_TRANSFORMATION_LIST', [ -# TRANFORMATION_ROTATE, -# ]) diff --git a/apps/converter/api.py b/apps/converter/api.py index 1ecefa5030..933962446d 100755 --- a/apps/converter/api.py +++ b/apps/converter/api.py @@ -15,10 +15,9 @@ from converter.conf.settings import LOW_QUALITY_OPTIONS from converter.conf.settings import HIGH_QUALITY_OPTIONS #from converter.conf.settings import UNOCONV_PATH - -from converter import TEMPORARY_DIRECTORY, TRANFORMATION_CHOICES -from utils import from_descriptor_to_tempfile - +from common import TEMPORARY_DIRECTORY +from converter import TRANFORMATION_CHOICES +from documents.utils import document_save_to_temp_dir QUALITY_DEFAULT = 'quality_default' QUALITY_LOW = 'quality_low' @@ -175,10 +174,8 @@ def get_page_count(input_filepath): #TODO: slugify OCR_OPTIONS and add to file name to cache def convert_document_for_ocr(document, page=0, format='tif'): #Extract document file - document.file.open() - desc = document.file.storage.open(document.file.path) - input_filepath = from_descriptor_to_tempfile(desc, document.uuid) - + input_filepath = document_save_to_temp_dir(document, document.uuid) + #Convert for OCR temp_filename, separator = os.path.splitext(os.path.basename(input_filepath)) temp_path = os.path.join(TEMPORARY_DIRECTORY, temp_filename) diff --git a/apps/converter/utils.py b/apps/converter/utils.py index ee50a701d2..2f125733f7 100644 --- a/apps/converter/utils.py +++ b/apps/converter/utils.py @@ -1,7 +1,7 @@ import os import tempfile -from converter import TEMPORARY_DIRECTORY +from common import TEMPORARY_DIRECTORY #http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python def copyfile(source, dest, buffer_size=1024*1024): @@ -24,36 +24,3 @@ def copyfile(source, dest, buffer_size=1024*1024): source.close() dest.close() - - -def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024): - path = os.path.join(TEMPORARY_DIRECTORY, filename) - - output_descriptor = open(path, 'wb') - - while 1: - copy_buffer = input_descriptor.read(buffer_size) - if copy_buffer: - output_descriptor.write(copy_buffer) - else: - break - - input_descriptor.close() - output_descriptor.close() - return path - - -def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024): - output_descriptor, tmp_filename = tempfile.mkstemp() - - while 1: - copy_buffer = input_descriptor.read(buffer_size) - if copy_buffer: - #output_descriptor.write(copy_buffer) - os.write(output_descriptor, copy_buffer) - else: - break - - input_descriptor.close() - os.close(output_descriptor) - return tmp_filename diff --git a/apps/documents/__init__.py b/apps/documents/__init__.py index 7b42534141..db0c200332 100755 --- a/apps/documents/__init__.py +++ b/apps/documents/__init__.py @@ -13,7 +13,6 @@ from models import Document, DocumentPage, DocumentPageTransformation from staging import StagingFile from common.conf import settings as common_settings - from conf.settings import ENABLE_SINGLE_DOCUMENT_UPLOAD PERMISSION_DOCUMENT_CREATE = 'document_create' @@ -75,6 +74,18 @@ register_links(['document_page_view', 'document_page_transformation_edit', 'docu register_links(StagingFile, [staging_file_preview, staging_file_delete]) + + +def document_exists(document): + try: + if document.exists(): + return '' + else: + return '' + except Exception, exc: + return exc + + register_model_list_columns(Document, [ {'name':_(u'thumbnail'), 'attribute': lambda x: '' % (reverse('document_preview', args=[x.id]), @@ -83,7 +94,9 @@ register_model_list_columns(Document, [ {'name':_(u'metadata'), 'attribute': lambda x: ', '.join(['%s - %s' %(metadata.metadata_type, metadata.value) for metadata in x.documentmetadata_set.all()]) }, - + {'name':_(u'exists'), 'attribute': + lambda x: document_exists(x) + }, ]) if ENABLE_SINGLE_DOCUMENT_UPLOAD: @@ -96,5 +109,3 @@ else: {'text':_('documents'), 'view':'document_create_multiple', 'links':[ document_create_multiple, document_list ],'famfam':'page','position':1}]) - -TEMPORARY_DIRECTORY = common_settings.TEMPORARY_DIRECTORY if common_settings.TEMPORARY_DIRECTORY else tempfile.mkdtemp() diff --git a/apps/documents/metadata.py b/apps/documents/metadata.py new file mode 100644 index 0000000000..08f63008aa --- /dev/null +++ b/apps/documents/metadata.py @@ -0,0 +1,54 @@ +from urllib import unquote_plus + +from django.shortcuts import get_object_or_404 +from django.core.exceptions import ObjectDoesNotExist + + +from models import DocumentMetadata, MetadataType + +def decode_metadata_from_url(url_dict): + metadata_dict = { + 'id':{}, + 'value':{} + } + metadata_list = [] + #Match out of order metadata_type ids with metadata values from request + for key, value in url_dict.items(): + if 'metadata' in key: + index, element = key[8:].split('_') + metadata_dict[element][index] = value + + #Convert the nested dictionary into a list of id+values dictionaries + for order, id in metadata_dict['id'].items(): + if order in metadata_dict['value'].keys(): + metadata_list.append({'id':id, 'value':metadata_dict['value'][order]}) + + return metadata_list + + +def save_metadata_list(metadata_list, document): + for item in metadata_list: + if item['value']: + save_metadata(item, document) + else: + try: + metadata_type = MetadataType.objects.get(id=item['id']) + document_metadata = DocumentMetadata.objects.get(document=document, + metadata_type=metadata_type) + document_metadata.delete() + except ObjectDoesNotExist: + pass + + +def save_metadata(metadata_dict, document): + #Use matched metadata now to create document metadata + document_metadata, created = DocumentMetadata.objects.get_or_create( + document=document, + metadata_type=get_object_or_404(MetadataType, pk=metadata_dict['id']), + ) + #Handle 'plus sign as space' in the url + + #unquote_plus handles utf-8?!? + #http://stackoverflow.com/questions/4382875/handling-iri-in-django + document_metadata.value=unquote_plus(metadata_dict['value'])#.decode('utf-8') + document_metadata.save() diff --git a/apps/documents/models.py b/apps/documents/models.py index 867cf84a8d..50091df2e0 100755 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -19,7 +19,6 @@ from documents.conf.settings import STORAGE_BACKEND from documents.conf.settings import AVAILABLE_TRANSFORMATIONS from documents.conf.settings import DEFAULT_TRANSFORMATIONS - def get_filename_from_uuid(instance, filename): filename, extension = os.path.splitext(filename) instance.file_filename = filename @@ -80,9 +79,9 @@ class Document(models.Model): def update_mimetype(self, save=True): try: mime = magic.Magic(mime=True) - self.file_mimetype = mime.from_buffer(self.read()) + self.file_mimetype = mime.from_buffer(self.open().read()) mime_encoding = magic.Magic(mime_encoding=True) - self.file_mime_encoding = mime_encoding.from_buffer(self.read()) + self.file_mime_encoding = mime_encoding.from_buffer(self.open().read()) except: self.file_mimetype = u'unknown' self.file_mime_encoding = u'unknown' @@ -119,8 +118,8 @@ class Document(models.Model): def save_to_file(self, filepath, buffer_size=1024*1024): input_descriptor = self.open() output_descriptor = open(filepath, 'wb') - while 1: - copy_buffer = input_descriptor.read() + while True: + copy_buffer = input_descriptor.read(buffer_size) if copy_buffer: output_descriptor.write(copy_buffer) else: @@ -129,7 +128,7 @@ class Document(models.Model): output_descriptor.close() return filepath - + def exists(self): return self.file.storage.exists(self.file.path) diff --git a/apps/documents/utils.py b/apps/documents/utils.py index 08e9e11b30..9e4fa81848 100755 --- a/apps/documents/utils.py +++ b/apps/documents/utils.py @@ -1,14 +1,9 @@ import os import tempfile -from urllib import unquote_plus - -from django.shortcuts import get_object_or_404 -from django.core.exceptions import ObjectDoesNotExist -from documents import TEMPORARY_DIRECTORY +from common import TEMPORARY_DIRECTORY -from models import Document, DocumentMetadata, MetadataType #http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python def copyfile(source, dest, buffer_size=1024*1024): @@ -22,7 +17,7 @@ def copyfile(source, dest, buffer_size=1024*1024): if not hasattr(dest, 'write'): dest = open(dest, 'wb') - while 1: + while True: copy_buffer = source.read(buffer_size) if copy_buffer: dest.write(copy_buffer) @@ -33,82 +28,8 @@ def copyfile(source, dest, buffer_size=1024*1024): dest.close() -def from_descriptor_to_tempfile(input_descriptor, filename, buffer_size=1024*1024): - path = os.path.join(TEMPORARY_DIRECTORY, filename) - - output_descriptor = open(path, 'wb') - - while 1: - copy_buffer = input_descriptor.read(buffer_size) - if copy_buffer: - output_descriptor.write(copy_buffer) - else: - break - - input_descriptor.close() - output_descriptor.close() - return path +def document_save_to_temp_dir(document, filename, buffer_size=1024*1024): + temporary_path = os.path.join(TEMPORARY_DIRECTORY, filename) + return document.save_to_file(temporary_path, buffer_size) -def from_descriptor_to_new_tempfile(input_descriptor, buffer_size=1024*1024): - output_descriptor, tmp_filename = tempfile.mkstemp() - - while 1: - copy_buffer = input_descriptor.read(buffer_size) - if copy_buffer: - #output_descriptor.write(copy_buffer) - os.write(output_descriptor, copy_buffer) - else: - break - - input_descriptor.close() - os.close(output_descriptor) - return tmp_filename - - -def decode_metadata_from_url(url_dict): - metadata_dict = { - 'id':{}, - 'value':{} - } - metadata_list = [] - #Match out of order metadata_type ids with metadata values from request - for key, value in url_dict.items(): - if 'metadata' in key: - index, element = key[8:].split('_') - metadata_dict[element][index] = value - - #Convert the nested dictionary into a list of id+values dictionaries - for order, id in metadata_dict['id'].items(): - if order in metadata_dict['value'].keys(): - metadata_list.append({'id':id, 'value':metadata_dict['value'][order]}) - - return metadata_list - - -def save_metadata_list(metadata_list, document): - for item in metadata_list: - if item['value']: - save_metadata(item, document) - else: - try: - metadata_type = MetadataType.objects.get(id=item['id']) - document_metadata = DocumentMetadata.objects.get(document=document, - metadata_type=metadata_type) - document_metadata.delete() - except ObjectDoesNotExist: - pass - - -def save_metadata(metadata_dict, document): - #Use matched metadata now to create document metadata - document_metadata, created = DocumentMetadata.objects.get_or_create( - document=document, - metadata_type=get_object_or_404(MetadataType, pk=metadata_dict['id']), - ) - #Handle 'plus sign as space' in the url - - #unquote_plus handles utf-8?!? - #http://stackoverflow.com/questions/4382875/handling-iri-in-django - document_metadata.value=unquote_plus(metadata_dict['value'])#.decode('utf-8') - document_metadata.save() diff --git a/apps/documents/views.py b/apps/documents/views.py index fe887c8f60..a9bb93e6f9 100755 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -16,26 +16,14 @@ from django.core.exceptions import ObjectDoesNotExist from django.core.files.uploadedfile import SimpleUploadedFile from common.utils import pretty_size -from permissions.api import check_permissions, Unauthorized -from filetransfers.api import serve_file from converter.api import convert, in_image_cache, QUALITY_DEFAULT from converter import TRANFORMATION_CHOICES +from filetransfers.api import serve_file from filesystem_serving.api import document_create_fs_links, document_delete_fs_links - - -from utils import from_descriptor_to_tempfile - -from models import Document, DocumentMetadata, DocumentType, MetadataType, \ - DocumentPage, DocumentPageTransformation - -from forms import DocumentTypeSelectForm, DocumentCreateWizard, \ - MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \ - StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \ - MetadataFormSet, DocumentPageForm, DocumentPageTransformationForm - -from staging import StagingFile - +from filesystem_serving.conf.settings import FILESERVING_ENABLE from ocr.models import add_document_to_queue +from permissions.api import check_permissions, Unauthorized + from documents.conf.settings import DELETE_STAGING_FILE_AFTER_UPLOAD from documents.conf.settings import USE_STAGING_DIRECTORY @@ -49,16 +37,23 @@ from documents.conf.settings import AUTOMATIC_OCR from documents.conf.settings import UNCOMPRESS_COMPRESSED_LOCAL_FILES from documents.conf.settings import UNCOMPRESS_COMPRESSED_STAGING_FILES -from filesystem_serving.conf.settings import FILESERVING_ENABLE - - from documents import PERMISSION_DOCUMENT_CREATE, \ PERMISSION_DOCUMENT_CREATE, PERMISSION_DOCUMENT_PROPERTIES_EDIT, \ PERMISSION_DOCUMENT_METADATA_EDIT, PERMISSION_DOCUMENT_VIEW, \ PERMISSION_DOCUMENT_DELETE, PERMISSION_DOCUMENT_DOWNLOAD, \ PERMISSION_DOCUMENT_TRANSFORM, PERMISSION_DOCUMENT_TOOLS + +from forms import DocumentTypeSelectForm, DocumentCreateWizard, \ + MetadataForm, DocumentForm, DocumentForm_edit, DocumentForm_view, \ + StagingDocumentForm, DocumentTypeMetadataType, DocumentPreviewForm, \ + MetadataFormSet, DocumentPageForm, DocumentPageTransformationForm -from utils import save_metadata, save_metadata_list, decode_metadata_from_url +from metadata import save_metadata, save_metadata_list, decode_metadata_from_url +from models import Document, DocumentMetadata, DocumentType, MetadataType, \ + DocumentPage, DocumentPageTransformation +from staging import StagingFile +from utils import document_save_to_temp_dir + def document_list(request): permissions = [PERMISSION_DOCUMENT_VIEW] @@ -249,7 +244,7 @@ def upload_document_with_type(request, document_type_id, multiple=True): return render_to_response('generic_form.html', context, context_instance=RequestContext(request)) - + def document_view(request, document_id): permissions = [PERMISSION_DOCUMENT_VIEW] try: @@ -503,10 +498,11 @@ def get_document_image(request, document_id, size=PREVIEW_SIZE, quality=QUALITY_ if filepath: return serve_file(request, File(file=open(filepath, 'r'))) #Save to a temporary location - filepath = from_descriptor_to_tempfile(document.open(), document.checksum) + filepath = document_save_to_temp_dir(document, filename=document.checksum) output_file = convert(filepath, size=size, format='jpg', quality=quality, extra_options=tranformation_string, page=page-1) return serve_file(request, File(file=open(output_file, 'r')), content_type='image/jpeg') except Exception, e: + #messages.error(request, e) if size == THUMBNAIL_SIZE: return serve_file(request, File(file=open('%simages/picture_error.png' % settings.MEDIA_ROOT, 'r'))) else: diff --git a/apps/main/views.py b/apps/main/views.py index 76333be0e8..62c9fc14dd 100755 --- a/apps/main/views.py +++ b/apps/main/views.py @@ -1,10 +1,11 @@ -import os import types from django.shortcuts import render_to_response from django.template import RequestContext from django.utils.translation import ugettext_lazy as _ +from common.utils import exists_with_famfam + from common.conf import settings as common_settings from documents.conf import settings as documents_settings from converter.conf import settings as converter_settings @@ -68,7 +69,7 @@ def check_settings(request): 'extra_columns':[ {'name':_(u'name'), 'attribute':'name'}, {'name':_(u'value'), 'attribute': lambda x: _return_type(x['value'])}, - {'name':_(u'exists'), 'attribute':lambda x: _exists(x['value']) if 'exists' in x else ''}, + {'name':_(u'exists'), 'attribute':lambda x: exists_with_famfam(x['value']) if 'exists' in x else ''}, ] } @@ -88,15 +89,6 @@ def _return_type(value): else: return value -def _exists(path): - try: - if os.path.exists(path): - return '' - else: - return '' - except Exception, exc: - return exc - def blank_menu(request): return render_to_response('generic_template.html', { diff --git a/apps/ocr/api.py b/apps/ocr/api.py index f244b45ae5..86338de21f 100755 --- a/apps/ocr/api.py +++ b/apps/ocr/api.py @@ -8,7 +8,7 @@ import tempfile from django.utils.translation import ugettext as _ from django.contrib import messages -from common.conf.settings import TEMPORARY_DIRECTORY +from common import TEMPORARY_DIRECTORY from documents.models import Document diff --git a/apps/storage/backends/gridfsstorage.py b/apps/storage/backends/gridfsstorage.py new file mode 100644 index 0000000000..c6923c342c --- /dev/null +++ b/apps/storage/backends/gridfsstorage.py @@ -0,0 +1,95 @@ +import os + +from django.core.files.storage import Storage +from django.utils.encoding import force_unicode + +from pymongo import Connection +from gridfs import GridFS + +HOST = u'localhost' +PORT = 27017 +DATABASE_NAME = u'document_storage' + +class GridFSStorage(Storage): + def __init__(self, *args, **kwargs): + self.db = Connection(host=HOST, port=PORT)[DATABASE_NAME] + self.fs = GridFS(self.db) + + + def save(self, name, content): + #TODO: if exists adding _ plus a counter + while True: + try: + # This file has a file path that we can move. + if hasattr(content, 'temporary_file_path'): + self.move(content.temporary_file_path(), name) + content.close() + # This is a normal uploadedfile that we can stream. + else: + # This fun binary flag incantation makes os.open throw an + # OSError if the file already exists before we open it. + newfile = self.fs.new_file(filename=name) + try: + for chunk in content.chunks(): + newfile.write(chunk) + finally: + newfile.close() + except Exception, e:#OSError, e: + # if e.errno == errno.EEXIST: + # # Ooops, the file exists. We need a new file name. + # name = self.get_available_name(name) + # full_path = self.path(name) + # else: + # raise + raise + else: + # OK, the file save worked. Break out of the loop. + break + + return name + + + def open(self, name, *args, **kwars): + return self.fs.get_last_version(name) + + + def delete(self, name): + oid = self.fs.get_last_version(name) + self.delete(oid) + return True + + + def exists(self, name): + return self.fs.exists(filename=name) + + + def path(self, name): + return force_unicode(name) + + + def move(self, old_file_name, name, chunk_size=1024*64): + # first open the old file, so that it won't go away + old_file = open(old_file_name, 'rb') + try: + newfile = self.fs.new_file(filename=name) + + try: + current_chunk = None + while current_chunk != '': + current_chunk = old_file.read(chunk_size) + newfile.write(current_chunk) + finally: + newfile.close() + finally: + old_file.close() + + try: + os.remove(old_file_name) + except OSError, e: + # Certain operating systems (Cygwin and Windows) + # fail when deleting opened files, ignore it. (For the + # systems where this happens, temporary files will be auto-deleted + # on close anyway.) + if getattr(e, 'winerror', 0) != 32 and getattr(e, 'errno', 0) != 13: + raise +