From e5bb00d51418549f52ae0f4bfbc19bbc061876bb Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 24 May 2017 19:50:14 -0400 Subject: [PATCH] Finish refactor of the new compressed file class support. Closes GitLab issue #7. Signed-off-by: Roberto Rosario --- HISTORY.rst | 2 + docs/releases/3.1.rst | 1 + mayan/apps/common/compressed_files.py | 199 +++++++++++------- mayan/apps/common/exceptions.py | 13 ++ mayan/apps/common/tests/contrib/test_file.tar | Bin 0 -> 3072 bytes .../common/tests/contrib/test_file.tar.bz2 | Bin 0 -> 190 bytes .../common/tests/contrib/test_file.tar.gz | Bin 0 -> 160 bytes mayan/apps/common/tests/contrib/test_file.zip | Bin 0 -> 358 bytes .../apps/common/tests/contrib/test_file1.txt | 1 + .../apps/common/tests/contrib/test_file2.txt | 1 + .../apps/common/tests/contrib/test_file3.txt | 1 + mayan/apps/common/tests/literals.py | 33 +++ .../common/tests/test_compressed_files.py | 70 ++++++ mayan/apps/documents/views/document_views.py | 24 +-- mayan/apps/sources/models.py | 29 +-- mayan/apps/sources/tasks.py | 9 +- 16 files changed, 282 insertions(+), 101 deletions(-) create mode 100644 mayan/apps/common/tests/contrib/test_file.tar create mode 100644 mayan/apps/common/tests/contrib/test_file.tar.bz2 create mode 100644 mayan/apps/common/tests/contrib/test_file.tar.gz create mode 100644 mayan/apps/common/tests/contrib/test_file.zip create mode 100644 mayan/apps/common/tests/contrib/test_file1.txt create mode 100644 mayan/apps/common/tests/contrib/test_file2.txt create mode 100644 mayan/apps/common/tests/contrib/test_file3.txt create mode 100644 mayan/apps/common/tests/test_compressed_files.py diff --git a/HISTORY.rst b/HISTORY.rst index f7682a9e4d..148435acb5 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -127,6 +127,8 @@ - Remove usage of pace.js. Would cause XMLRequest to fallback to synchronous mode. - Add custom AJAX spinner. +- Complete refactor of the compress archive class support. Closes + GitLab issue #7. 3.0.3 (2018-08-17) ================== diff --git a/docs/releases/3.1.rst b/docs/releases/3.1.rst index abc67098ce..00b37a8801 100644 --- a/docs/releases/3.1.rst +++ b/docs/releases/3.1.rst @@ -424,6 +424,7 @@ must be changed:: Bugs fixed or issues closed =========================== +* `GitLab issue #7 `_ Feature: other compressors than zip for compressed documents * `GitLab issue #259 `_ Thumbnails: why are they created on the fly (therefore: not cached) .. _PyPI: https://pypi.python.org/pypi/mayan-edms/ diff --git a/mayan/apps/common/compressed_files.py b/mayan/apps/common/compressed_files.py index 4898a9f5ac..ff21f8984e 100644 --- a/mayan/apps/common/compressed_files.py +++ b/mayan/apps/common/compressed_files.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from io import BytesIO +import tarfile import zipfile try: @@ -11,98 +12,154 @@ except ImportError: from django.core.files.uploadedfile import SimpleUploadedFile +from mimetype.api import get_mimetype -class NotACompressedFile(Exception): - pass +from .exceptions import NoMIMETypeMatch -class CompressedFile(object): - def __init__(self, file_input=None): - if file_input: - try: - # Is it a file like object? - file_input.seek(0) - except AttributeError: - # If not, try open it. - self._open(file_input) - else: - self.file_object = file_input - else: - self._create() +class Archive(object): + _registry = {} - def _create(self): - self.descriptor = BytesIO() - self.zf = zipfile.ZipFile(self.descriptor, mode='w') + @classmethod + def register(cls, mime_types, archive_classes): + for mime_type in mime_types: + for archive_class in archive_classes: + cls._registry.setdefault( + mime_type, [] + ).append(archive_class) - def _open(self, file_input): - try: - # Is it a file like object? - file_input.seek(0) - except AttributeError: - # If not, try open it. - self.descriptor = open(file_input, 'r+b') - else: - self.descriptor = file_input + @classmethod + def open(cls, file_object): + mime_type = get_mimetype( + file_object=file_object, mimetype_only=True + )[0] try: - test = zipfile.ZipFile(self.descriptor, mode='r') - except zipfile.BadZipfile: - raise NotACompressedFile - else: - test.close() - self.descriptor.seek(0) - self.zf = zipfile.ZipFile(self.descriptor, mode='a') + for archive_class in cls._registry[mime_type]: + instance = archive_class() + instance._open(file_object=file_object) + return instance + except KeyError: + raise NoMIMETypeMatch - def add_file(self, file_input, arcname=None): - try: - # Is it a file like object? - file_input.seek(0) - except AttributeError: - # If not, keep it - self.zf.write( - file_input, arcname=arcname, compress_type=COMPRESSION - ) - else: - self.zf.writestr(arcname, file_input.read()) + def _open(self, file_object): + raise NotImplementedError - def contents(self): + def add_file(self, file_object, filename): + """ + Add a file as a member of an archive + """ + raise NotImplementedError + + def close(self): + self._archive.close() + + def create(self): + """ + Create an empty archive + """ + raise NotImplementedError + + def get_members(self): + return ( + SimpleUploadedFile( + name=filename, content=self.member_contents(filename) + ) for filename in self.members() + ) + + def member_contents(self, filename): + """ + Return the content of a member + """ + raise NotImplementedError + + def members(self): + """ + Return a list of all the elements inside the archive + """ + raise NotImplementedError + + def open_member(self, filename): + """ + Return a file-like object to a member of the archive + """ + raise NotImplemented + + +class TarArchive(Archive): + def _open(self, file_object): + self._archive = tarfile.open(fileobj=file_object) + + def add_file(self, file_object, filename): + self._archive.addfile( + tarfile.TarInfo(), fileobj=file_object + ) + + def create(self): + self.string_buffer = BytesIO() + self._archive = tarfile.TarFile(fileobj=self.string_buffer, mode='w') + + def member_contents(self, filename): + return self._archive.extractfile(filename).read() + + def members(self): + return self._archive.getnames() + + def open_member(self, filename): + return self._archive.extractfile(filename) + + +class ZipArchive(Archive): + def _open(self, file_object): + self._archive = zipfile.ZipFile(file_object) + + def add_file(self, file_object, filename): + self._archive.writestr( + zinfo_or_arcname=filename, bytes=file_object.read(), + compress_type=COMPRESSION + ) + + def create(self): + self.string_buffer = BytesIO() + self._archive = zipfile.ZipFile(self.string_buffer, mode='w') + + def member_contents(self, filename): + return self._archive.read(filename) + + def members(self): return [ - filename for filename in self.zf.namelist() if not filename.endswith('/') + filename for filename in self._archive.namelist() if not filename.endswith('/') ] - def get_content(self, filename): - return self.zf.read(filename) + def open_member(self, filename): + return self._archive.open(filename) def write(self, filename=None): # fix for Linux zip files read in Windows - for file in self.zf.filelist: - file.create_system = 0 + for entry in self._archive.filelist: + entry.create_system = 0 - self.descriptor.seek(0) + self.string_buffer.seek(0) if filename: - descriptor = open(filename, 'w') - descriptor.write(self.descriptor.read()) + with open(filename, 'w') as file_object: + file_object.write(self.string_buffer.read()) else: - return self.descriptor + return self.string_buffer def as_file(self, filename): return SimpleUploadedFile(name=filename, content=self.write().read()) - def children(self): - try: - # Try for a ZIP file - zfobj = zipfile.ZipFile(self.file_object) - filenames = [ - filename for filename in zfobj.namelist() if not filename.endswith('/') - ] - return ( - SimpleUploadedFile( - name=filename, content=zfobj.read(filename) - ) for filename in filenames - ) - except zipfile.BadZipfile: - raise NotACompressedFile - def close(self): - self.zf.close() +Archive.register( + mime_types=('application/zip',), archive_classes=(ZipArchive,) +) +Archive.register( + mime_types=('application/x-tar',), archive_classes=(TarArchive,) +) +Archive.register( + mime_types=('application/gzip',), archive_classes=(TarArchive,) +) +Archive.register( + mime_types=('application/x-bzip2',), archive_classes=(TarArchive,) +) diff --git a/mayan/apps/common/exceptions.py b/mayan/apps/common/exceptions.py index cd205c87b8..8322d46e48 100644 --- a/mayan/apps/common/exceptions.py +++ b/mayan/apps/common/exceptions.py @@ -15,6 +15,19 @@ class ActionError(BaseCommonException): it is used to announce that one item in the queryset failed to process. """ +class CompressionFileError(BaseCommonException): + """ + Base exception for file decompression class + """ + pass + + +class NoMIMETypeMatch(CompressionFileError): + """ + There is no decompressor registered for the specified MIME type + """ + pass + class NotLatestVersion(BaseCommonException): """ diff --git a/mayan/apps/common/tests/contrib/test_file.tar b/mayan/apps/common/tests/contrib/test_file.tar new file mode 100644 index 0000000000000000000000000000000000000000..d096e59a8ed49a69646b4f69a2c5e0a21b7b7ffd GIT binary patch literal 3072 zcmXR(EiQ>q%gjkN)GMhdVW0^xFfcGPF;QSJFfcSXH9)3;G9X}RqF`ujXlQ6`W@u<; zs9<1dYGPo@pkP1~*Pt0%T3nJ?1aw+aesN+^W_}TxG@MIF9^w;_=RiOKu5XYtLR^DG z6x=+0Tonwt2Dwv5okxQZD1qgFqrs8?jevOvoc|3*^FIxu8^cIM{x=%U{}`b&Dli%X MqaiRF0>d{10MrsZRsaA1 literal 0 HcmV?d00001 diff --git a/mayan/apps/common/tests/contrib/test_file.tar.bz2 b/mayan/apps/common/tests/contrib/test_file.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..c563c61a1bfbd596f3e8bd8adc6515f9a5da1f03 GIT binary patch literal 190 zcmV;v073skT4**gL0KkKSvaFO1pok;e~{7;1OPw*e}Drd9021ao zGF3~sSvjkQ)$tgUgrd@ojFRS(sbHael;hWv$yJnbUZj=2u)!_N7yA~Cs;Fkki<>u_ sgQLTu$!K>`^z8XNqD>7E)yY%g`taUsuk%-RYSjIjEqtxFoR%=#--T;>4oN{32ZH2+2cyq5!9d z2ncZv4pDIP^l?=%8XWoG2$*-k`QKnP|I-5?^1sn&{vQRSU=)mk O5d;8+PdKRn3;+Ntb3i2k literal 0 HcmV?d00001 diff --git a/mayan/apps/common/tests/contrib/test_file.zip b/mayan/apps/common/tests/contrib/test_file.zip new file mode 100644 index 0000000000000000000000000000000000000000..20a5bf2caa8c8ac98dd5a055e8d3f6b773854e93 GIT binary patch literal 358 zcmWIWW@h1H0D=^5*otEz+Cu9H4=nN zE4UdLSza(RFo1~=*WeHZH%}i|1w*a?xS0(=Gk-4ln1j_!Be 1: - compressed_file = CompressedFile() + compressed_file = ZipArchive() + compressed_file.create() for item in queryset: - descriptor = DocumentDownloadView.get_item_file(item=item) - compressed_file.add_file( - descriptor, arcname=self.get_item_label(item=item) - ) - descriptor.close() - DocumentDownloadView.commit_event( - item=item, request=self.request - ) + with DocumentDownloadView.get_item_file(item=item) as file_object: + compressed_file.add_file( + file_object=file_object, + filename=self.get_item_label(item=item) + ) + DocumentDownloadView.commit_event( + item=item, request=self.request + ) compressed_file.close() return DocumentDownloadView.VirtualFile( - compressed_file.as_file(zip_filename), - name=zip_filename + compressed_file.as_file(zip_filename), name=zip_filename ) else: item = queryset.first() diff --git a/mayan/apps/sources/models.py b/mayan/apps/sources/models.py index 60878a0f04..9dc418537f 100644 --- a/mayan/apps/sources/models.py +++ b/mayan/apps/sources/models.py @@ -21,7 +21,8 @@ from django.utils.translation import ugettext_lazy as _ from model_utils.managers import InheritanceManager -from common.compressed_files import CompressedFile, NotACompressedFile +from common.compressed_files import Archive +from common.exceptions import NoMIMETypeMatch from common.utils import TemporaryFile from converter.models import Transformation from djcelery.models import PeriodicTask, IntervalSchedule @@ -88,24 +89,24 @@ class Source(models.Model): kwargs = { 'description': description, 'document_type': document_type, - 'label': label, 'language': language, - 'user': user + 'label': label, 'language': language, 'user': user } if expand: try: - compressed_file = CompressedFile(file_object) - for compressed_file_child in compressed_file.children(): - kwargs.update({'label': force_text(compressed_file_child)}) - documents.append( - self.upload_document( - file_object=File(compressed_file_child), **kwargs + compressed_file = Archive.open(file_object=file_object) + for compressed_file_child in compressed_file.members(): + with compressed_file.open_member(filename=compressed_file_child) as file_object: + kwargs.update( + {'label': force_text(compressed_file_child)} ) - ) - compressed_file_child.close() - - except NotACompressedFile: - logging.debug('Exception: NotACompressedFile') + documents.append( + self.upload_document( + file_object=file_object, **kwargs + ) + ) + except NoMIMETypeMatch: + logging.debug('Exception: NoMIMETypeMatch') documents.append( self.upload_document(file_object=file_object, **kwargs) ) diff --git a/mayan/apps/sources/tasks.py b/mayan/apps/sources/tasks.py index 2bd413090c..6237bac1ab 100644 --- a/mayan/apps/sources/tasks.py +++ b/mayan/apps/sources/tasks.py @@ -9,7 +9,8 @@ from django.utils.translation import ugettext_lazy as _ from mayan.celery import app -from common.compressed_files import CompressedFile, NotACompressedFile +from common.compressed_files import Archive +from common.exceptions import NoMIMETypeMatch from lock_manager import LockError from lock_manager.runtime import locking_backend @@ -138,7 +139,7 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s with shared_upload.open() as file_object: if expand: try: - compressed_file = CompressedFile(file_object) + compressed_file = Archive.open(file_object=file_object) for compressed_file_child in compressed_file.children(): # TODO: find way to uniquely identify child files # Use filename in the mean time. @@ -187,8 +188,8 @@ def task_source_handle_upload(self, document_type_id, shared_uploaded_file_id, s 'upload file: %s; %s. Retrying.', shared_upload, exception ) - except NotACompressedFile: - logging.debug('Exception: NotACompressedFile') + except NoMIMETypeMatch: + logging.debug('Exception: NoMIMETypeMatch') task_upload_document.delay( shared_uploaded_file_id=shared_upload.pk, **kwargs )