Finish refactor of the new compressed file class support. Closes GitLab issue #7.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2017-05-24 19:50:14 -04:00
parent 61d2cc3d03
commit e5bb00d514
16 changed files with 282 additions and 101 deletions

View File

@@ -127,6 +127,8 @@
- Remove usage of pace.js. Would cause XMLRequest to fallback to - Remove usage of pace.js. Would cause XMLRequest to fallback to
synchronous mode. synchronous mode.
- Add custom AJAX spinner. - Add custom AJAX spinner.
- Complete refactor of the compress archive class support. Closes
GitLab issue #7.
3.0.3 (2018-08-17) 3.0.3 (2018-08-17)
================== ==================

View File

@@ -424,6 +424,7 @@ must be changed::
Bugs fixed or issues closed Bugs fixed or issues closed
=========================== ===========================
* `GitLab issue #7 <https://gitlab.com/mayan-edms/mayan-edms/issues/7>`_ Feature: other compressors than zip for compressed documents
* `GitLab issue #259 <https://gitlab.com/mayan-edms/mayan-edms/issues/259>`_ Thumbnails: why are they created on the fly (therefore: not cached) * `GitLab issue #259 <https://gitlab.com/mayan-edms/mayan-edms/issues/259>`_ Thumbnails: why are they created on the fly (therefore: not cached)
.. _PyPI: https://pypi.python.org/pypi/mayan-edms/ .. _PyPI: https://pypi.python.org/pypi/mayan-edms/

View File

@@ -1,6 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from io import BytesIO from io import BytesIO
import tarfile
import zipfile import zipfile
try: try:
@@ -11,98 +12,154 @@ except ImportError:
from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadedfile import SimpleUploadedFile
from mimetype.api import get_mimetype
class NotACompressedFile(Exception): from .exceptions import NoMIMETypeMatch
pass
class CompressedFile(object): class Archive(object):
def __init__(self, file_input=None): _registry = {}
if file_input:
try:
# Is it a file like object?
file_input.seek(0)
except AttributeError:
# If not, try open it.
self._open(file_input)
else:
self.file_object = file_input
else:
self._create()
def _create(self): @classmethod
self.descriptor = BytesIO() def register(cls, mime_types, archive_classes):
self.zf = zipfile.ZipFile(self.descriptor, mode='w') for mime_type in mime_types:
for archive_class in archive_classes:
cls._registry.setdefault(
mime_type, []
).append(archive_class)
def _open(self, file_input): @classmethod
try: def open(cls, file_object):
# Is it a file like object? mime_type = get_mimetype(
file_input.seek(0) file_object=file_object, mimetype_only=True
except AttributeError: )[0]
# If not, try open it.
self.descriptor = open(file_input, 'r+b')
else:
self.descriptor = file_input
try: try:
test = zipfile.ZipFile(self.descriptor, mode='r') for archive_class in cls._registry[mime_type]:
except zipfile.BadZipfile: instance = archive_class()
raise NotACompressedFile instance._open(file_object=file_object)
else: return instance
test.close() except KeyError:
self.descriptor.seek(0) raise NoMIMETypeMatch
self.zf = zipfile.ZipFile(self.descriptor, mode='a')
def add_file(self, file_input, arcname=None): def _open(self, file_object):
try: raise NotImplementedError
# Is it a file like object?
file_input.seek(0)
except AttributeError:
# If not, keep it
self.zf.write(
file_input, arcname=arcname, compress_type=COMPRESSION
)
else:
self.zf.writestr(arcname, file_input.read())
def contents(self): def add_file(self, file_object, filename):
"""
Add a file as a member of an archive
"""
raise NotImplementedError
def close(self):
self._archive.close()
def create(self):
"""
Create an empty archive
"""
raise NotImplementedError
def get_members(self):
return (
SimpleUploadedFile(
name=filename, content=self.member_contents(filename)
) for filename in self.members()
)
def member_contents(self, filename):
"""
Return the content of a member
"""
raise NotImplementedError
def members(self):
"""
Return a list of all the elements inside the archive
"""
raise NotImplementedError
def open_member(self, filename):
"""
Return a file-like object to a member of the archive
"""
raise NotImplemented
class TarArchive(Archive):
def _open(self, file_object):
self._archive = tarfile.open(fileobj=file_object)
def add_file(self, file_object, filename):
self._archive.addfile(
tarfile.TarInfo(), fileobj=file_object
)
def create(self):
self.string_buffer = BytesIO()
self._archive = tarfile.TarFile(fileobj=self.string_buffer, mode='w')
def member_contents(self, filename):
return self._archive.extractfile(filename).read()
def members(self):
return self._archive.getnames()
def open_member(self, filename):
return self._archive.extractfile(filename)
class ZipArchive(Archive):
def _open(self, file_object):
self._archive = zipfile.ZipFile(file_object)
def add_file(self, file_object, filename):
self._archive.writestr(
zinfo_or_arcname=filename, bytes=file_object.read(),
compress_type=COMPRESSION
)
def create(self):
self.string_buffer = BytesIO()
self._archive = zipfile.ZipFile(self.string_buffer, mode='w')
def member_contents(self, filename):
return self._archive.read(filename)
def members(self):
return [ return [
filename for filename in self.zf.namelist() if not filename.endswith('/') filename for filename in self._archive.namelist() if not filename.endswith('/')
] ]
def get_content(self, filename): def open_member(self, filename):
return self.zf.read(filename) return self._archive.open(filename)
def write(self, filename=None): def write(self, filename=None):
# fix for Linux zip files read in Windows # fix for Linux zip files read in Windows
for file in self.zf.filelist: for entry in self._archive.filelist:
file.create_system = 0 entry.create_system = 0
self.descriptor.seek(0) self.string_buffer.seek(0)
if filename: if filename:
descriptor = open(filename, 'w') with open(filename, 'w') as file_object:
descriptor.write(self.descriptor.read()) file_object.write(self.string_buffer.read())
else: else:
return self.descriptor return self.string_buffer
def as_file(self, filename): def as_file(self, filename):
return SimpleUploadedFile(name=filename, content=self.write().read()) return SimpleUploadedFile(name=filename, content=self.write().read())
def children(self):
try:
# Try for a ZIP file
zfobj = zipfile.ZipFile(self.file_object)
filenames = [
filename for filename in zfobj.namelist() if not filename.endswith('/')
]
return (
SimpleUploadedFile(
name=filename, content=zfobj.read(filename)
) for filename in filenames
)
except zipfile.BadZipfile:
raise NotACompressedFile
def close(self): Archive.register(
self.zf.close() mime_types=('application/zip',), archive_classes=(ZipArchive,)
)
Archive.register(
mime_types=('application/x-tar',), archive_classes=(TarArchive,)
)
Archive.register(
mime_types=('application/gzip',), archive_classes=(TarArchive,)
)
Archive.register(
mime_types=('application/x-bzip2',), archive_classes=(TarArchive,)
)

View File

@@ -15,6 +15,19 @@ class ActionError(BaseCommonException):
it is used to announce that one item in the queryset failed to process. it is used to announce that one item in the queryset failed to process.
""" """
class CompressionFileError(BaseCommonException):
"""
Base exception for file decompression class
"""
pass
class NoMIMETypeMatch(CompressionFileError):
"""
There is no decompressor registered for the specified MIME type
"""
pass
class NotLatestVersion(BaseCommonException): class NotLatestVersion(BaseCommonException):
""" """

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
TEST FILE 1

View File

@@ -0,0 +1 @@
TEST FILE 2

View File

@@ -0,0 +1 @@
TEST FILE 3

View File

@@ -1,5 +1,38 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os
from django.conf import settings
TEST_ERROR_LOG_ENTRY_RESULT = 'test_error_log_entry_result_text' TEST_ERROR_LOG_ENTRY_RESULT = 'test_error_log_entry_result_text'
TEST_VIEW_NAME = 'test view name' TEST_VIEW_NAME = 'test view name'
TEST_VIEW_URL = 'test-view-url' TEST_VIEW_URL = 'test-view-url'
# Filenames
TEST_FILENAME1 = 'test_file1.txt'
TEST_FILENAME2 = 'test_file2.txt'
TEST_FILENAME3 = 'test_file3.txt'
TEST_FILE_CONTENTS_1 = 'TEST FILE 1\n'
TEST_FILE_CONTENTS_2 = 'TEST FILE 2\n'
TEST_TAR_BZ2_FILE = 'test_file.tar.bz2'
TEST_TAR_FILE = 'test_file.tar'
TEST_TAR_GZ_FILE = 'test_file.tar.gz'
TEST_ZIP_FILE = 'test_file.zip'
TEST_COMPRESSED_FILE_CONTENTS = [TEST_FILENAME1, TEST_FILENAME2]
# File paths
TEST_FILE3_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_FILENAME3
)
TEST_TAR_BZ2_FILE_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_TAR_BZ2_FILE
)
TEST_TAR_FILE_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_TAR_FILE
)
TEST_TAR_GZ_FILE_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_TAR_GZ_FILE
)
TEST_ZIP_FILE_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_ZIP_FILE
)

View File

@@ -0,0 +1,70 @@
from __future__ import unicode_literals
from common.tests import BaseTestCase
from django.test import override_settings
from ..compressed_files import Archive, TarArchive, ZipArchive
from .literals import (
TEST_COMPRESSED_FILE_CONTENTS, TEST_FILE_CONTENTS_1, TEST_FILE_CONTENTS_2,
TEST_FILE3_PATH, TEST_FILENAME1, TEST_FILENAME2, TEST_FILENAME3,
TEST_TAR_BZ2_FILE_PATH, TEST_TAR_FILE_PATH, TEST_TAR_GZ_FILE_PATH,
TEST_ZIP_FILE_PATH
)
class TarArchiveClassTestCase(BaseTestCase):
archive_path = TEST_TAR_FILE_PATH
cls = TarArchive
filename = TEST_FILENAME3
file_path = TEST_FILE3_PATH
members_list = TEST_COMPRESSED_FILE_CONTENTS
member_name = TEST_FILENAME1
member_contents = TEST_FILE_CONTENTS_1
def test_add_file(self):
archive = self.cls()
archive.create()
with open(self.file_path) as file_object:
archive.add_file(file_object=file_object, filename=self.filename)
self.assertTrue(archive.members(), [self.filename])
def test_open(self):
with open(self.archive_path) as file_object:
archive = Archive.open(file_object=file_object)
self.assertTrue(isinstance(archive, self.cls))
def test_members(self):
with open(self.archive_path) as file_object:
archive = Archive.open(file_object=file_object)
self.assertEqual(archive.members(), self.members_list)
def test_member_contents(self):
with open(self.archive_path) as file_object:
archive = Archive.open(file_object=file_object)
self.assertEqual(
archive.member_contents(filename=self.member_name), self.member_contents
)
def test_open_member(self):
with open(self.archive_path) as file_object:
archive = Archive.open(file_object=file_object)
file_object = archive.open_member(filename=self.member_name)
self.assertEqual(
file_object.read(), self.member_contents
)
class ZipArchiveClassTestCase(TarArchiveClassTestCase):
archive_path = TEST_ZIP_FILE_PATH
cls = ZipArchive
class TarGzArchiveClassTestCase(TarArchiveClassTestCase):
archive_path = TEST_TAR_GZ_FILE_PATH
cls = TarArchive
class TarBz2ArchiveClassTestCase(TarArchiveClassTestCase):
archive_path = TEST_TAR_BZ2_FILE_PATH
cls = TarArchive

View File

@@ -11,7 +11,7 @@ from django.utils.http import urlencode
from django.utils.translation import ugettext_lazy as _, ungettext from django.utils.translation import ugettext_lazy as _, ungettext
from acls.models import AccessControlList from acls.models import AccessControlList
from common.compressed_files import CompressedFile from common.compressed_files import ZipArchive
from common.exceptions import ActionError from common.exceptions import ActionError
from common.generics import ( from common.generics import (
ConfirmView, FormView, MultipleObjectConfirmActionView, ConfirmView, FormView, MultipleObjectConfirmActionView,
@@ -553,22 +553,22 @@ class DocumentDownloadView(SingleObjectDownloadView):
) )
if self.request.GET.get('compressed') == 'True' or queryset.count() > 1: if self.request.GET.get('compressed') == 'True' or queryset.count() > 1:
compressed_file = CompressedFile() compressed_file = ZipArchive()
compressed_file.create()
for item in queryset: for item in queryset:
descriptor = DocumentDownloadView.get_item_file(item=item) with DocumentDownloadView.get_item_file(item=item) as file_object:
compressed_file.add_file( compressed_file.add_file(
descriptor, arcname=self.get_item_label(item=item) file_object=file_object,
) filename=self.get_item_label(item=item)
descriptor.close() )
DocumentDownloadView.commit_event( DocumentDownloadView.commit_event(
item=item, request=self.request item=item, request=self.request
) )
compressed_file.close() compressed_file.close()
return DocumentDownloadView.VirtualFile( return DocumentDownloadView.VirtualFile(
compressed_file.as_file(zip_filename), compressed_file.as_file(zip_filename), name=zip_filename
name=zip_filename
) )
else: else:
item = queryset.first() item = queryset.first()

View File

@@ -21,7 +21,8 @@ from django.utils.translation import ugettext_lazy as _
from model_utils.managers import InheritanceManager from model_utils.managers import InheritanceManager
from common.compressed_files import CompressedFile, NotACompressedFile from common.compressed_files import Archive
from common.exceptions import NoMIMETypeMatch
from common.utils import TemporaryFile from common.utils import TemporaryFile
from converter.models import Transformation from converter.models import Transformation
from djcelery.models import PeriodicTask, IntervalSchedule from djcelery.models import PeriodicTask, IntervalSchedule
@@ -88,24 +89,24 @@ class Source(models.Model):
kwargs = { kwargs = {
'description': description, 'document_type': document_type, 'description': description, 'document_type': document_type,
'label': label, 'language': language, 'label': label, 'language': language, 'user': user
'user': user
} }
if expand: if expand:
try: try:
compressed_file = CompressedFile(file_object) compressed_file = Archive.open(file_object=file_object)
for compressed_file_child in compressed_file.children(): for compressed_file_child in compressed_file.members():
kwargs.update({'label': force_text(compressed_file_child)}) with compressed_file.open_member(filename=compressed_file_child) as file_object:
documents.append( kwargs.update(
self.upload_document( {'label': force_text(compressed_file_child)}
file_object=File(compressed_file_child), **kwargs
) )
) documents.append(
compressed_file_child.close() self.upload_document(
file_object=file_object, **kwargs
except NotACompressedFile: )
logging.debug('Exception: NotACompressedFile') )
except NoMIMETypeMatch:
logging.debug('Exception: NoMIMETypeMatch')
documents.append( documents.append(
self.upload_document(file_object=file_object, **kwargs) self.upload_document(file_object=file_object, **kwargs)
) )

View File

@@ -9,7 +9,8 @@ from django.utils.translation import ugettext_lazy as _
from mayan.celery import app from mayan.celery import app
from common.compressed_files import CompressedFile, NotACompressedFile from common.compressed_files import Archive
from common.exceptions import NoMIMETypeMatch
from lock_manager import LockError from lock_manager import LockError
from lock_manager.runtime import locking_backend from lock_manager.runtime import locking_backend
@@ -138,7 +139,7 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s
with shared_upload.open() as file_object: with shared_upload.open() as file_object:
if expand: if expand:
try: try:
compressed_file = CompressedFile(file_object) compressed_file = Archive.open(file_object=file_object)
for compressed_file_child in compressed_file.children(): for compressed_file_child in compressed_file.children():
# TODO: find way to uniquely identify child files # TODO: find way to uniquely identify child files
# Use filename in the mean time. # Use filename in the mean time.
@@ -187,8 +188,8 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s
'upload file: %s; %s. Retrying.', shared_upload, 'upload file: %s; %s. Retrying.', shared_upload,
exception exception
) )
except NotACompressedFile: except NoMIMETypeMatch:
logging.debug('Exception: NotACompressedFile') logging.debug('Exception: NoMIMETypeMatch')
task_upload_document.delay( task_upload_document.delay(
shared_uploaded_file_id=shared_upload.pk, **kwargs shared_uploaded_file_id=shared_upload.pk, **kwargs
) )