Finish refactor of the new compressed file class support. Closes GitLab issue #7.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2017-05-24 19:50:14 -04:00
parent 61d2cc3d03
commit e5bb00d514
16 changed files with 282 additions and 101 deletions

View File

@@ -127,6 +127,8 @@
- Remove usage of pace.js. Would cause XMLRequest to fallback to
synchronous mode.
- Add custom AJAX spinner.
- Complete refactor of the compress archive class support. Closes
GitLab issue #7.
3.0.3 (2018-08-17)
==================

View File

@@ -424,6 +424,7 @@ must be changed::
Bugs fixed or issues closed
===========================
* `GitLab issue #7 <https://gitlab.com/mayan-edms/mayan-edms/issues/7>`_ Feature: other compressors than zip for compressed documents
* `GitLab issue #259 <https://gitlab.com/mayan-edms/mayan-edms/issues/259>`_ Thumbnails: why are they created on the fly (therefore: not cached)
.. _PyPI: https://pypi.python.org/pypi/mayan-edms/

View File

@@ -1,6 +1,7 @@
from __future__ import unicode_literals
from io import BytesIO
import tarfile
import zipfile
try:
@@ -11,98 +12,154 @@ except ImportError:
from django.core.files.uploadedfile import SimpleUploadedFile
from mimetype.api import get_mimetype
class NotACompressedFile(Exception):
pass
from .exceptions import NoMIMETypeMatch
class CompressedFile(object):
def __init__(self, file_input=None):
if file_input:
try:
# Is it a file like object?
file_input.seek(0)
except AttributeError:
# If not, try open it.
self._open(file_input)
else:
self.file_object = file_input
else:
self._create()
class Archive(object):
_registry = {}
def _create(self):
self.descriptor = BytesIO()
self.zf = zipfile.ZipFile(self.descriptor, mode='w')
@classmethod
def register(cls, mime_types, archive_classes):
for mime_type in mime_types:
for archive_class in archive_classes:
cls._registry.setdefault(
mime_type, []
).append(archive_class)
def _open(self, file_input):
try:
# Is it a file like object?
file_input.seek(0)
except AttributeError:
# If not, try open it.
self.descriptor = open(file_input, 'r+b')
else:
self.descriptor = file_input
@classmethod
def open(cls, file_object):
mime_type = get_mimetype(
file_object=file_object, mimetype_only=True
)[0]
try:
test = zipfile.ZipFile(self.descriptor, mode='r')
except zipfile.BadZipfile:
raise NotACompressedFile
else:
test.close()
self.descriptor.seek(0)
self.zf = zipfile.ZipFile(self.descriptor, mode='a')
for archive_class in cls._registry[mime_type]:
instance = archive_class()
instance._open(file_object=file_object)
return instance
except KeyError:
raise NoMIMETypeMatch
def add_file(self, file_input, arcname=None):
try:
# Is it a file like object?
file_input.seek(0)
except AttributeError:
# If not, keep it
self.zf.write(
file_input, arcname=arcname, compress_type=COMPRESSION
)
else:
self.zf.writestr(arcname, file_input.read())
def _open(self, file_object):
raise NotImplementedError
def contents(self):
def add_file(self, file_object, filename):
"""
Add a file as a member of an archive
"""
raise NotImplementedError
def close(self):
self._archive.close()
def create(self):
"""
Create an empty archive
"""
raise NotImplementedError
def get_members(self):
return (
SimpleUploadedFile(
name=filename, content=self.member_contents(filename)
) for filename in self.members()
)
def member_contents(self, filename):
"""
Return the content of a member
"""
raise NotImplementedError
def members(self):
"""
Return a list of all the elements inside the archive
"""
raise NotImplementedError
def open_member(self, filename):
"""
Return a file-like object to a member of the archive
"""
raise NotImplemented
class TarArchive(Archive):
def _open(self, file_object):
self._archive = tarfile.open(fileobj=file_object)
def add_file(self, file_object, filename):
self._archive.addfile(
tarfile.TarInfo(), fileobj=file_object
)
def create(self):
self.string_buffer = BytesIO()
self._archive = tarfile.TarFile(fileobj=self.string_buffer, mode='w')
def member_contents(self, filename):
return self._archive.extractfile(filename).read()
def members(self):
return self._archive.getnames()
def open_member(self, filename):
return self._archive.extractfile(filename)
class ZipArchive(Archive):
def _open(self, file_object):
self._archive = zipfile.ZipFile(file_object)
def add_file(self, file_object, filename):
self._archive.writestr(
zinfo_or_arcname=filename, bytes=file_object.read(),
compress_type=COMPRESSION
)
def create(self):
self.string_buffer = BytesIO()
self._archive = zipfile.ZipFile(self.string_buffer, mode='w')
def member_contents(self, filename):
return self._archive.read(filename)
def members(self):
return [
filename for filename in self.zf.namelist() if not filename.endswith('/')
filename for filename in self._archive.namelist() if not filename.endswith('/')
]
def get_content(self, filename):
return self.zf.read(filename)
def open_member(self, filename):
return self._archive.open(filename)
def write(self, filename=None):
# fix for Linux zip files read in Windows
for file in self.zf.filelist:
file.create_system = 0
for entry in self._archive.filelist:
entry.create_system = 0
self.descriptor.seek(0)
self.string_buffer.seek(0)
if filename:
descriptor = open(filename, 'w')
descriptor.write(self.descriptor.read())
with open(filename, 'w') as file_object:
file_object.write(self.string_buffer.read())
else:
return self.descriptor
return self.string_buffer
def as_file(self, filename):
return SimpleUploadedFile(name=filename, content=self.write().read())
def children(self):
try:
# Try for a ZIP file
zfobj = zipfile.ZipFile(self.file_object)
filenames = [
filename for filename in zfobj.namelist() if not filename.endswith('/')
]
return (
SimpleUploadedFile(
name=filename, content=zfobj.read(filename)
) for filename in filenames
)
except zipfile.BadZipfile:
raise NotACompressedFile
def close(self):
self.zf.close()
Archive.register(
mime_types=('application/zip',), archive_classes=(ZipArchive,)
)
Archive.register(
mime_types=('application/x-tar',), archive_classes=(TarArchive,)
)
Archive.register(
mime_types=('application/gzip',), archive_classes=(TarArchive,)
)
Archive.register(
mime_types=('application/x-bzip2',), archive_classes=(TarArchive,)
)

View File

@@ -15,6 +15,19 @@ class ActionError(BaseCommonException):
it is used to announce that one item in the queryset failed to process.
"""
class CompressionFileError(BaseCommonException):
"""
Base exception for file decompression class
"""
pass
class NoMIMETypeMatch(CompressionFileError):
"""
There is no decompressor registered for the specified MIME type
"""
pass
class NotLatestVersion(BaseCommonException):
"""

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
TEST FILE 1

View File

@@ -0,0 +1 @@
TEST FILE 2

View File

@@ -0,0 +1 @@
TEST FILE 3

View File

@@ -1,5 +1,38 @@
from __future__ import unicode_literals
import os
from django.conf import settings
TEST_ERROR_LOG_ENTRY_RESULT = 'test_error_log_entry_result_text'
TEST_VIEW_NAME = 'test view name'
TEST_VIEW_URL = 'test-view-url'
# Filenames
TEST_FILENAME1 = 'test_file1.txt'
TEST_FILENAME2 = 'test_file2.txt'
TEST_FILENAME3 = 'test_file3.txt'
TEST_FILE_CONTENTS_1 = 'TEST FILE 1\n'
TEST_FILE_CONTENTS_2 = 'TEST FILE 2\n'
TEST_TAR_BZ2_FILE = 'test_file.tar.bz2'
TEST_TAR_FILE = 'test_file.tar'
TEST_TAR_GZ_FILE = 'test_file.tar.gz'
TEST_ZIP_FILE = 'test_file.zip'
TEST_COMPRESSED_FILE_CONTENTS = [TEST_FILENAME1, TEST_FILENAME2]
# File paths
TEST_FILE3_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_FILENAME3
)
TEST_TAR_BZ2_FILE_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_TAR_BZ2_FILE
)
TEST_TAR_FILE_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_TAR_FILE
)
TEST_TAR_GZ_FILE_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_TAR_GZ_FILE
)
TEST_ZIP_FILE_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_ZIP_FILE
)

View File

@@ -0,0 +1,70 @@
from __future__ import unicode_literals
from common.tests import BaseTestCase
from django.test import override_settings
from ..compressed_files import Archive, TarArchive, ZipArchive
from .literals import (
TEST_COMPRESSED_FILE_CONTENTS, TEST_FILE_CONTENTS_1, TEST_FILE_CONTENTS_2,
TEST_FILE3_PATH, TEST_FILENAME1, TEST_FILENAME2, TEST_FILENAME3,
TEST_TAR_BZ2_FILE_PATH, TEST_TAR_FILE_PATH, TEST_TAR_GZ_FILE_PATH,
TEST_ZIP_FILE_PATH
)
class TarArchiveClassTestCase(BaseTestCase):
archive_path = TEST_TAR_FILE_PATH
cls = TarArchive
filename = TEST_FILENAME3
file_path = TEST_FILE3_PATH
members_list = TEST_COMPRESSED_FILE_CONTENTS
member_name = TEST_FILENAME1
member_contents = TEST_FILE_CONTENTS_1
def test_add_file(self):
archive = self.cls()
archive.create()
with open(self.file_path) as file_object:
archive.add_file(file_object=file_object, filename=self.filename)
self.assertTrue(archive.members(), [self.filename])
def test_open(self):
with open(self.archive_path) as file_object:
archive = Archive.open(file_object=file_object)
self.assertTrue(isinstance(archive, self.cls))
def test_members(self):
with open(self.archive_path) as file_object:
archive = Archive.open(file_object=file_object)
self.assertEqual(archive.members(), self.members_list)
def test_member_contents(self):
with open(self.archive_path) as file_object:
archive = Archive.open(file_object=file_object)
self.assertEqual(
archive.member_contents(filename=self.member_name), self.member_contents
)
def test_open_member(self):
with open(self.archive_path) as file_object:
archive = Archive.open(file_object=file_object)
file_object = archive.open_member(filename=self.member_name)
self.assertEqual(
file_object.read(), self.member_contents
)
class ZipArchiveClassTestCase(TarArchiveClassTestCase):
archive_path = TEST_ZIP_FILE_PATH
cls = ZipArchive
class TarGzArchiveClassTestCase(TarArchiveClassTestCase):
archive_path = TEST_TAR_GZ_FILE_PATH
cls = TarArchive
class TarBz2ArchiveClassTestCase(TarArchiveClassTestCase):
archive_path = TEST_TAR_BZ2_FILE_PATH
cls = TarArchive

View File

@@ -11,7 +11,7 @@ from django.utils.http import urlencode
from django.utils.translation import ugettext_lazy as _, ungettext
from acls.models import AccessControlList
from common.compressed_files import CompressedFile
from common.compressed_files import ZipArchive
from common.exceptions import ActionError
from common.generics import (
ConfirmView, FormView, MultipleObjectConfirmActionView,
@@ -553,22 +553,22 @@ class DocumentDownloadView(SingleObjectDownloadView):
)
if self.request.GET.get('compressed') == 'True' or queryset.count() > 1:
compressed_file = CompressedFile()
compressed_file = ZipArchive()
compressed_file.create()
for item in queryset:
descriptor = DocumentDownloadView.get_item_file(item=item)
compressed_file.add_file(
descriptor, arcname=self.get_item_label(item=item)
)
descriptor.close()
DocumentDownloadView.commit_event(
item=item, request=self.request
)
with DocumentDownloadView.get_item_file(item=item) as file_object:
compressed_file.add_file(
file_object=file_object,
filename=self.get_item_label(item=item)
)
DocumentDownloadView.commit_event(
item=item, request=self.request
)
compressed_file.close()
return DocumentDownloadView.VirtualFile(
compressed_file.as_file(zip_filename),
name=zip_filename
compressed_file.as_file(zip_filename), name=zip_filename
)
else:
item = queryset.first()

View File

@@ -21,7 +21,8 @@ from django.utils.translation import ugettext_lazy as _
from model_utils.managers import InheritanceManager
from common.compressed_files import CompressedFile, NotACompressedFile
from common.compressed_files import Archive
from common.exceptions import NoMIMETypeMatch
from common.utils import TemporaryFile
from converter.models import Transformation
from djcelery.models import PeriodicTask, IntervalSchedule
@@ -88,24 +89,24 @@ class Source(models.Model):
kwargs = {
'description': description, 'document_type': document_type,
'label': label, 'language': language,
'user': user
'label': label, 'language': language, 'user': user
}
if expand:
try:
compressed_file = CompressedFile(file_object)
for compressed_file_child in compressed_file.children():
kwargs.update({'label': force_text(compressed_file_child)})
documents.append(
self.upload_document(
file_object=File(compressed_file_child), **kwargs
compressed_file = Archive.open(file_object=file_object)
for compressed_file_child in compressed_file.members():
with compressed_file.open_member(filename=compressed_file_child) as file_object:
kwargs.update(
{'label': force_text(compressed_file_child)}
)
)
compressed_file_child.close()
except NotACompressedFile:
logging.debug('Exception: NotACompressedFile')
documents.append(
self.upload_document(
file_object=file_object, **kwargs
)
)
except NoMIMETypeMatch:
logging.debug('Exception: NoMIMETypeMatch')
documents.append(
self.upload_document(file_object=file_object, **kwargs)
)

View File

@@ -9,7 +9,8 @@ from django.utils.translation import ugettext_lazy as _
from mayan.celery import app
from common.compressed_files import CompressedFile, NotACompressedFile
from common.compressed_files import Archive
from common.exceptions import NoMIMETypeMatch
from lock_manager import LockError
from lock_manager.runtime import locking_backend
@@ -138,7 +139,7 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s
with shared_upload.open() as file_object:
if expand:
try:
compressed_file = CompressedFile(file_object)
compressed_file = Archive.open(file_object=file_object)
for compressed_file_child in compressed_file.children():
# TODO: find way to uniquely identify child files
# Use filename in the mean time.
@@ -187,8 +188,8 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s
'upload file: %s; %s. Retrying.', shared_upload,
exception
)
except NotACompressedFile:
logging.debug('Exception: NotACompressedFile')
except NoMIMETypeMatch:
logging.debug('Exception: NoMIMETypeMatch')
task_upload_document.delay(
shared_uploaded_file_id=shared_upload.pk, **kwargs
)