diff --git a/HISTORY.rst b/HISTORY.rst index f7682a9e4d..148435acb5 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -127,6 +127,8 @@ - Remove usage of pace.js. Would cause XMLRequest to fallback to synchronous mode. - Add custom AJAX spinner. +- Complete refactor of the compress archive class support. Closes + GitLab issue #7. 3.0.3 (2018-08-17) ================== diff --git a/docs/releases/3.1.rst b/docs/releases/3.1.rst index abc67098ce..00b37a8801 100644 --- a/docs/releases/3.1.rst +++ b/docs/releases/3.1.rst @@ -424,6 +424,7 @@ must be changed:: Bugs fixed or issues closed =========================== +* `GitLab issue #7 `_ Feature: other compressors than zip for compressed documents * `GitLab issue #259 `_ Thumbnails: why are they created on the fly (therefore: not cached) .. _PyPI: https://pypi.python.org/pypi/mayan-edms/ diff --git a/mayan/apps/common/compressed_files.py b/mayan/apps/common/compressed_files.py index 4898a9f5ac..ff21f8984e 100644 --- a/mayan/apps/common/compressed_files.py +++ b/mayan/apps/common/compressed_files.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from io import BytesIO +import tarfile import zipfile try: @@ -11,98 +12,154 @@ except ImportError: from django.core.files.uploadedfile import SimpleUploadedFile +from mimetype.api import get_mimetype -class NotACompressedFile(Exception): - pass +from .exceptions import NoMIMETypeMatch -class CompressedFile(object): - def __init__(self, file_input=None): - if file_input: - try: - # Is it a file like object? - file_input.seek(0) - except AttributeError: - # If not, try open it. - self._open(file_input) - else: - self.file_object = file_input - else: - self._create() +class Archive(object): + _registry = {} - def _create(self): - self.descriptor = BytesIO() - self.zf = zipfile.ZipFile(self.descriptor, mode='w') + @classmethod + def register(cls, mime_types, archive_classes): + for mime_type in mime_types: + for archive_class in archive_classes: + cls._registry.setdefault( + mime_type, [] + ).append(archive_class) - def _open(self, file_input): - try: - # Is it a file like object? - file_input.seek(0) - except AttributeError: - # If not, try open it. - self.descriptor = open(file_input, 'r+b') - else: - self.descriptor = file_input + @classmethod + def open(cls, file_object): + mime_type = get_mimetype( + file_object=file_object, mimetype_only=True + )[0] try: - test = zipfile.ZipFile(self.descriptor, mode='r') - except zipfile.BadZipfile: - raise NotACompressedFile - else: - test.close() - self.descriptor.seek(0) - self.zf = zipfile.ZipFile(self.descriptor, mode='a') + for archive_class in cls._registry[mime_type]: + instance = archive_class() + instance._open(file_object=file_object) + return instance + except KeyError: + raise NoMIMETypeMatch - def add_file(self, file_input, arcname=None): - try: - # Is it a file like object? - file_input.seek(0) - except AttributeError: - # If not, keep it - self.zf.write( - file_input, arcname=arcname, compress_type=COMPRESSION - ) - else: - self.zf.writestr(arcname, file_input.read()) + def _open(self, file_object): + raise NotImplementedError - def contents(self): + def add_file(self, file_object, filename): + """ + Add a file as a member of an archive + """ + raise NotImplementedError + + def close(self): + self._archive.close() + + def create(self): + """ + Create an empty archive + """ + raise NotImplementedError + + def get_members(self): + return ( + SimpleUploadedFile( + name=filename, content=self.member_contents(filename) + ) for filename in self.members() + ) + + def member_contents(self, filename): + """ + Return the content of a member + """ + raise NotImplementedError + + def members(self): + """ + Return a list of all the elements inside the archive + """ + raise NotImplementedError + + def open_member(self, filename): + """ + Return a file-like object to a member of the archive + """ + raise NotImplemented + + +class TarArchive(Archive): + def _open(self, file_object): + self._archive = tarfile.open(fileobj=file_object) + + def add_file(self, file_object, filename): + self._archive.addfile( + tarfile.TarInfo(), fileobj=file_object + ) + + def create(self): + self.string_buffer = BytesIO() + self._archive = tarfile.TarFile(fileobj=self.string_buffer, mode='w') + + def member_contents(self, filename): + return self._archive.extractfile(filename).read() + + def members(self): + return self._archive.getnames() + + def open_member(self, filename): + return self._archive.extractfile(filename) + + +class ZipArchive(Archive): + def _open(self, file_object): + self._archive = zipfile.ZipFile(file_object) + + def add_file(self, file_object, filename): + self._archive.writestr( + zinfo_or_arcname=filename, bytes=file_object.read(), + compress_type=COMPRESSION + ) + + def create(self): + self.string_buffer = BytesIO() + self._archive = zipfile.ZipFile(self.string_buffer, mode='w') + + def member_contents(self, filename): + return self._archive.read(filename) + + def members(self): return [ - filename for filename in self.zf.namelist() if not filename.endswith('/') + filename for filename in self._archive.namelist() if not filename.endswith('/') ] - def get_content(self, filename): - return self.zf.read(filename) + def open_member(self, filename): + return self._archive.open(filename) def write(self, filename=None): # fix for Linux zip files read in Windows - for file in self.zf.filelist: - file.create_system = 0 + for entry in self._archive.filelist: + entry.create_system = 0 - self.descriptor.seek(0) + self.string_buffer.seek(0) if filename: - descriptor = open(filename, 'w') - descriptor.write(self.descriptor.read()) + with open(filename, 'w') as file_object: + file_object.write(self.string_buffer.read()) else: - return self.descriptor + return self.string_buffer def as_file(self, filename): return SimpleUploadedFile(name=filename, content=self.write().read()) - def children(self): - try: - # Try for a ZIP file - zfobj = zipfile.ZipFile(self.file_object) - filenames = [ - filename for filename in zfobj.namelist() if not filename.endswith('/') - ] - return ( - SimpleUploadedFile( - name=filename, content=zfobj.read(filename) - ) for filename in filenames - ) - except zipfile.BadZipfile: - raise NotACompressedFile - def close(self): - self.zf.close() +Archive.register( + mime_types=('application/zip',), archive_classes=(ZipArchive,) +) +Archive.register( + mime_types=('application/x-tar',), archive_classes=(TarArchive,) +) +Archive.register( + mime_types=('application/gzip',), archive_classes=(TarArchive,) +) +Archive.register( + mime_types=('application/x-bzip2',), archive_classes=(TarArchive,) +) diff --git a/mayan/apps/common/exceptions.py b/mayan/apps/common/exceptions.py index cd205c87b8..8322d46e48 100644 --- a/mayan/apps/common/exceptions.py +++ b/mayan/apps/common/exceptions.py @@ -15,6 +15,19 @@ class ActionError(BaseCommonException): it is used to announce that one item in the queryset failed to process. """ +class CompressionFileError(BaseCommonException): + """ + Base exception for file decompression class + """ + pass + + +class NoMIMETypeMatch(CompressionFileError): + """ + There is no decompressor registered for the specified MIME type + """ + pass + class NotLatestVersion(BaseCommonException): """ diff --git a/mayan/apps/common/tests/contrib/test_file.tar b/mayan/apps/common/tests/contrib/test_file.tar new file mode 100644 index 0000000000..d096e59a8e Binary files /dev/null and b/mayan/apps/common/tests/contrib/test_file.tar differ diff --git a/mayan/apps/common/tests/contrib/test_file.tar.bz2 b/mayan/apps/common/tests/contrib/test_file.tar.bz2 new file mode 100644 index 0000000000..c563c61a1b Binary files /dev/null and b/mayan/apps/common/tests/contrib/test_file.tar.bz2 differ diff --git a/mayan/apps/common/tests/contrib/test_file.tar.gz b/mayan/apps/common/tests/contrib/test_file.tar.gz new file mode 100644 index 0000000000..08c111eda3 Binary files /dev/null and b/mayan/apps/common/tests/contrib/test_file.tar.gz differ diff --git a/mayan/apps/common/tests/contrib/test_file.zip b/mayan/apps/common/tests/contrib/test_file.zip new file mode 100644 index 0000000000..20a5bf2caa Binary files /dev/null and b/mayan/apps/common/tests/contrib/test_file.zip differ diff --git a/mayan/apps/common/tests/contrib/test_file1.txt b/mayan/apps/common/tests/contrib/test_file1.txt new file mode 100644 index 0000000000..650056ae0a --- /dev/null +++ b/mayan/apps/common/tests/contrib/test_file1.txt @@ -0,0 +1 @@ +TEST FILE 1 diff --git a/mayan/apps/common/tests/contrib/test_file2.txt b/mayan/apps/common/tests/contrib/test_file2.txt new file mode 100644 index 0000000000..997c91f492 --- /dev/null +++ b/mayan/apps/common/tests/contrib/test_file2.txt @@ -0,0 +1 @@ +TEST FILE 2 diff --git a/mayan/apps/common/tests/contrib/test_file3.txt b/mayan/apps/common/tests/contrib/test_file3.txt new file mode 100644 index 0000000000..4fec0332d5 --- /dev/null +++ b/mayan/apps/common/tests/contrib/test_file3.txt @@ -0,0 +1 @@ +TEST FILE 3 diff --git a/mayan/apps/common/tests/literals.py b/mayan/apps/common/tests/literals.py index b3e3bb2ef5..be5c8e68cd 100644 --- a/mayan/apps/common/tests/literals.py +++ b/mayan/apps/common/tests/literals.py @@ -1,5 +1,38 @@ from __future__ import unicode_literals +import os + +from django.conf import settings + TEST_ERROR_LOG_ENTRY_RESULT = 'test_error_log_entry_result_text' TEST_VIEW_NAME = 'test view name' TEST_VIEW_URL = 'test-view-url' + +# Filenames +TEST_FILENAME1 = 'test_file1.txt' +TEST_FILENAME2 = 'test_file2.txt' +TEST_FILENAME3 = 'test_file3.txt' +TEST_FILE_CONTENTS_1 = 'TEST FILE 1\n' +TEST_FILE_CONTENTS_2 = 'TEST FILE 2\n' +TEST_TAR_BZ2_FILE = 'test_file.tar.bz2' +TEST_TAR_FILE = 'test_file.tar' +TEST_TAR_GZ_FILE = 'test_file.tar.gz' +TEST_ZIP_FILE = 'test_file.zip' +TEST_COMPRESSED_FILE_CONTENTS = [TEST_FILENAME1, TEST_FILENAME2] + +# File paths +TEST_FILE3_PATH = os.path.join( + settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_FILENAME3 +) +TEST_TAR_BZ2_FILE_PATH = os.path.join( + settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_TAR_BZ2_FILE +) +TEST_TAR_FILE_PATH = os.path.join( + settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_TAR_FILE +) +TEST_TAR_GZ_FILE_PATH = os.path.join( + settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_TAR_GZ_FILE +) +TEST_ZIP_FILE_PATH = os.path.join( + settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_ZIP_FILE +) diff --git a/mayan/apps/common/tests/test_compressed_files.py b/mayan/apps/common/tests/test_compressed_files.py new file mode 100644 index 0000000000..83f17fe3e0 --- /dev/null +++ b/mayan/apps/common/tests/test_compressed_files.py @@ -0,0 +1,70 @@ +from __future__ import unicode_literals + +from common.tests import BaseTestCase +from django.test import override_settings + +from ..compressed_files import Archive, TarArchive, ZipArchive + +from .literals import ( + TEST_COMPRESSED_FILE_CONTENTS, TEST_FILE_CONTENTS_1, TEST_FILE_CONTENTS_2, + TEST_FILE3_PATH, TEST_FILENAME1, TEST_FILENAME2, TEST_FILENAME3, + TEST_TAR_BZ2_FILE_PATH, TEST_TAR_FILE_PATH, TEST_TAR_GZ_FILE_PATH, + TEST_ZIP_FILE_PATH +) + + +class TarArchiveClassTestCase(BaseTestCase): + archive_path = TEST_TAR_FILE_PATH + cls = TarArchive + filename = TEST_FILENAME3 + file_path = TEST_FILE3_PATH + members_list = TEST_COMPRESSED_FILE_CONTENTS + member_name = TEST_FILENAME1 + member_contents = TEST_FILE_CONTENTS_1 + + def test_add_file(self): + archive = self.cls() + archive.create() + with open(self.file_path) as file_object: + archive.add_file(file_object=file_object, filename=self.filename) + self.assertTrue(archive.members(), [self.filename]) + + def test_open(self): + with open(self.archive_path) as file_object: + archive = Archive.open(file_object=file_object) + self.assertTrue(isinstance(archive, self.cls)) + + def test_members(self): + with open(self.archive_path) as file_object: + archive = Archive.open(file_object=file_object) + self.assertEqual(archive.members(), self.members_list) + + def test_member_contents(self): + with open(self.archive_path) as file_object: + archive = Archive.open(file_object=file_object) + self.assertEqual( + archive.member_contents(filename=self.member_name), self.member_contents + ) + + def test_open_member(self): + with open(self.archive_path) as file_object: + archive = Archive.open(file_object=file_object) + file_object = archive.open_member(filename=self.member_name) + self.assertEqual( + file_object.read(), self.member_contents + ) + + +class ZipArchiveClassTestCase(TarArchiveClassTestCase): + archive_path = TEST_ZIP_FILE_PATH + cls = ZipArchive + + +class TarGzArchiveClassTestCase(TarArchiveClassTestCase): + archive_path = TEST_TAR_GZ_FILE_PATH + cls = TarArchive + + +class TarBz2ArchiveClassTestCase(TarArchiveClassTestCase): + archive_path = TEST_TAR_BZ2_FILE_PATH + cls = TarArchive diff --git a/mayan/apps/documents/views/document_views.py b/mayan/apps/documents/views/document_views.py index eafcf9a771..8f51b145bb 100644 --- a/mayan/apps/documents/views/document_views.py +++ b/mayan/apps/documents/views/document_views.py @@ -11,7 +11,7 @@ from django.utils.http import urlencode from django.utils.translation import ugettext_lazy as _, ungettext from acls.models import AccessControlList -from common.compressed_files import CompressedFile +from common.compressed_files import ZipArchive from common.exceptions import ActionError from common.generics import ( ConfirmView, FormView, MultipleObjectConfirmActionView, @@ -553,22 +553,22 @@ class DocumentDownloadView(SingleObjectDownloadView): ) if self.request.GET.get('compressed') == 'True' or queryset.count() > 1: - compressed_file = CompressedFile() + compressed_file = ZipArchive() + compressed_file.create() for item in queryset: - descriptor = DocumentDownloadView.get_item_file(item=item) - compressed_file.add_file( - descriptor, arcname=self.get_item_label(item=item) - ) - descriptor.close() - DocumentDownloadView.commit_event( - item=item, request=self.request - ) + with DocumentDownloadView.get_item_file(item=item) as file_object: + compressed_file.add_file( + file_object=file_object, + filename=self.get_item_label(item=item) + ) + DocumentDownloadView.commit_event( + item=item, request=self.request + ) compressed_file.close() return DocumentDownloadView.VirtualFile( - compressed_file.as_file(zip_filename), - name=zip_filename + compressed_file.as_file(zip_filename), name=zip_filename ) else: item = queryset.first() diff --git a/mayan/apps/sources/models.py b/mayan/apps/sources/models.py index 60878a0f04..9dc418537f 100644 --- a/mayan/apps/sources/models.py +++ b/mayan/apps/sources/models.py @@ -21,7 +21,8 @@ from django.utils.translation import ugettext_lazy as _ from model_utils.managers import InheritanceManager -from common.compressed_files import CompressedFile, NotACompressedFile +from common.compressed_files import Archive +from common.exceptions import NoMIMETypeMatch from common.utils import TemporaryFile from converter.models import Transformation from djcelery.models import PeriodicTask, IntervalSchedule @@ -88,24 +89,24 @@ class Source(models.Model): kwargs = { 'description': description, 'document_type': document_type, - 'label': label, 'language': language, - 'user': user + 'label': label, 'language': language, 'user': user } if expand: try: - compressed_file = CompressedFile(file_object) - for compressed_file_child in compressed_file.children(): - kwargs.update({'label': force_text(compressed_file_child)}) - documents.append( - self.upload_document( - file_object=File(compressed_file_child), **kwargs + compressed_file = Archive.open(file_object=file_object) + for compressed_file_child in compressed_file.members(): + with compressed_file.open_member(filename=compressed_file_child) as file_object: + kwargs.update( + {'label': force_text(compressed_file_child)} ) - ) - compressed_file_child.close() - - except NotACompressedFile: - logging.debug('Exception: NotACompressedFile') + documents.append( + self.upload_document( + file_object=file_object, **kwargs + ) + ) + except NoMIMETypeMatch: + logging.debug('Exception: NoMIMETypeMatch') documents.append( self.upload_document(file_object=file_object, **kwargs) ) diff --git a/mayan/apps/sources/tasks.py b/mayan/apps/sources/tasks.py index 2bd413090c..6237bac1ab 100644 --- a/mayan/apps/sources/tasks.py +++ b/mayan/apps/sources/tasks.py @@ -9,7 +9,8 @@ from django.utils.translation import ugettext_lazy as _ from mayan.celery import app -from common.compressed_files import CompressedFile, NotACompressedFile +from common.compressed_files import Archive +from common.exceptions import NoMIMETypeMatch from lock_manager import LockError from lock_manager.runtime import locking_backend @@ -138,7 +139,7 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s with shared_upload.open() as file_object: if expand: try: - compressed_file = CompressedFile(file_object) + compressed_file = Archive.open(file_object=file_object) for compressed_file_child in compressed_file.children(): # TODO: find way to uniquely identify child files # Use filename in the mean time. @@ -187,8 +188,8 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s 'upload file: %s; %s. Retrying.', shared_upload, exception ) - except NotACompressedFile: - logging.debug('Exception: NotACompressedFile') + except NoMIMETypeMatch: + logging.debug('Exception: NoMIMETypeMatch') task_upload_document.delay( shared_uploaded_file_id=shared_upload.pk, **kwargs )