diff --git a/mayan/apps/common/compressed_files.py b/mayan/apps/common/compressed_files.py index 4bef477028..ba41531662 100644 --- a/mayan/apps/common/compressed_files.py +++ b/mayan/apps/common/compressed_files.py @@ -11,6 +11,7 @@ except ImportError: COMPRESSION = zipfile.ZIP_STORED from django.core.files.uploadedfile import SimpleUploadedFile +from django.utils.encoding import force_text from mayan.apps.mimetype.api import get_mimetype @@ -136,9 +137,32 @@ class ZipArchive(Archive): return self._archive.read(filename) def members(self): - return [ - filename for filename in self._archive.namelist() if not filename.endswith('/') - ] + results = [] + from django.utils.encoding import force_str + + for filename in self._archive.namelist(): + # Zip files only support UTF-8 and CP437 encodings. + # Attempt to decode CP437 to be able to check if it ends + # with a slash. + # Future improvement that violates the Zip format: + # Add chardet.detect to detect the most likely encoding + # if other than CP437. + try: + filename = filename.decode('CP437') + is_unicode = False + except UnicodeEncodeError: + is_unicode = True + + if not filename.endswith('/'): + # Re encode in the original encoding + if not is_unicode: + filename = filename.encode( + encoding='CP437', errors='strict' + ) + + results.append(filename) + + return results def open_member(self, filename): return self._archive.open(filename) diff --git a/mayan/apps/common/tests/contrib/test_archvive_with_cp437_member.zip b/mayan/apps/common/tests/contrib/test_archvive_with_cp437_member.zip new file mode 100644 index 0000000000..7b65bc19a4 Binary files /dev/null and b/mayan/apps/common/tests/contrib/test_archvive_with_cp437_member.zip differ diff --git a/mayan/apps/common/tests/literals.py b/mayan/apps/common/tests/literals.py index 3cfb7ca5fd..8645012f5e 100644 --- a/mayan/apps/common/tests/literals.py +++ b/mayan/apps/common/tests/literals.py @@ -12,6 +12,7 @@ TEST_VIEW_URL = 'test-view-url' # Filenames TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER = 'test_archvive_with_special_characters_filename_member.zip' +TEST_ARCHIVE_ZIP_CP437_MEMBER = 'test_archvive_with_cp437_member.zip' TEST_FILENAME1 = 'test_file1.txt' TEST_FILENAME2 = 'test_file2.txt' TEST_FILENAME3 = 'test_file3.txt' @@ -28,6 +29,10 @@ TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER_PATH = os.path.join( settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER ) +TEST_ARCHIVE_ZIP_CP437_MEMBER_PATH = os.path.join( + settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', + TEST_ARCHIVE_ZIP_CP437_MEMBER +) TEST_FILE3_PATH = os.path.join( settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_FILENAME3 ) diff --git a/mayan/apps/common/tests/test_compressed_files.py b/mayan/apps/common/tests/test_compressed_files.py index e004514c28..0d896205a0 100644 --- a/mayan/apps/common/tests/test_compressed_files.py +++ b/mayan/apps/common/tests/test_compressed_files.py @@ -5,6 +5,7 @@ from mayan.apps.common.tests import BaseTestCase from ..compressed_files import Archive, TarArchive, ZipArchive from .literals import ( + TEST_ARCHIVE_ZIP_CP437_MEMBER_PATH, TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER_PATH, TEST_COMPRESSED_FILE_CONTENTS, TEST_FILE_CONTENTS_1, TEST_FILE3_PATH, TEST_FILENAME1, TEST_FILENAME3, TEST_TAR_BZ2_FILE_PATH, @@ -64,6 +65,11 @@ class ZipArchiveClassTestCase(TarArchiveClassTestCase): archive = Archive.open(file_object=file_object) list(archive.get_members()) + def test_open_cp437_member(self): + with open(TEST_ARCHIVE_ZIP_CP437_MEMBER_PATH, mode='rb') as file_object: + archive = Archive.open(file_object=file_object) + list(archive.get_members()) + class TarGzArchiveClassTestCase(TarArchiveClassTestCase): archive_path = TEST_TAR_GZ_FILE_PATH