diff --git a/HISTORY.rst b/HISTORY.rst index 7fcee34a93..a3e1bdb6ce 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -10,6 +10,8 @@ - Update the Redis lock connection intialization so that is works with Redis versions < 5.0. GitLab issue #709. Rob de Canha-Knight (@rssfed23) for the report and debug information. +- Update the ZipArchive class to work with badly encoded filenames. + GitLab issue #651. Thanks to Fabian (@ruffy91) for the report. 3.3.5 (2019-12-13) ================== diff --git a/mayan/apps/common/compressed_files.py b/mayan/apps/common/compressed_files.py index 4bef477028..7d5eb13968 100644 --- a/mayan/apps/common/compressed_files.py +++ b/mayan/apps/common/compressed_files.py @@ -11,6 +11,7 @@ except ImportError: COMPRESSION = zipfile.ZIP_STORED from django.core.files.uploadedfile import SimpleUploadedFile +from django.utils.encoding import force_text from mayan.apps.mimetype.api import get_mimetype @@ -136,9 +137,34 @@ class ZipArchive(Archive): return self._archive.read(filename) def members(self): - return [ - filename for filename in self._archive.namelist() if not filename.endswith('/') - ] + results = [] + + for filename in self._archive.namelist(): + # Zip files only support UTF-8 and CP437 encodings. + # Attempt to decode CP437 to be able to check if it ends + # with a slash. + # Future improvement that violates the Zip format: + # Add chardet.detect to detect the most likely encoding + # if other than CP437. + try: + filename = filename.decode('CP437') + is_unicode = False + except AttributeError: + filename = force_text(filename) + is_unicode = True + except UnicodeEncodeError: + is_unicode = True + + if not filename.endswith('/'): + # Re encode in the original encoding + if not is_unicode: + filename = filename.encode( + encoding='CP437', errors='strict' + ) + + results.append(filename) + + return results def open_member(self, filename): return self._archive.open(filename) diff --git a/mayan/apps/common/tests/contrib/test_archvive_with_cp437_member.zip b/mayan/apps/common/tests/contrib/test_archvive_with_cp437_member.zip new file mode 100644 index 0000000000..7b65bc19a4 Binary files /dev/null and b/mayan/apps/common/tests/contrib/test_archvive_with_cp437_member.zip differ diff --git a/mayan/apps/common/tests/literals.py b/mayan/apps/common/tests/literals.py index 0f96e2cf0a..32aec7aa71 100644 --- a/mayan/apps/common/tests/literals.py +++ b/mayan/apps/common/tests/literals.py @@ -12,6 +12,7 @@ TEST_VIEW_URL = 'test-view-url' # Filenames TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER = 'test_archvive_with_special_characters_filename_member.zip' +TEST_ARCHIVE_ZIP_CP437_MEMBER = 'test_archvive_with_cp437_member.zip' TEST_FILENAME1 = 'test_file1.txt' TEST_FILENAME2 = 'test_file2.txt' TEST_FILENAME3 = 'test_file3.txt' @@ -28,6 +29,10 @@ TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER_PATH = os.path.join( settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER ) +TEST_ARCHIVE_ZIP_CP437_MEMBER_PATH = os.path.join( + settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', + TEST_ARCHIVE_ZIP_CP437_MEMBER +) TEST_FILE3_PATH = os.path.join( settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_FILENAME3 ) diff --git a/mayan/apps/common/tests/test_compressed_files.py b/mayan/apps/common/tests/test_compressed_files.py index 18d2071a17..f414b6c0b3 100644 --- a/mayan/apps/common/tests/test_compressed_files.py +++ b/mayan/apps/common/tests/test_compressed_files.py @@ -5,6 +5,7 @@ from mayan.apps.common.tests.base import BaseTestCase from ..compressed_files import Archive, TarArchive, ZipArchive from .literals import ( + TEST_ARCHIVE_ZIP_CP437_MEMBER_PATH, TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER_PATH, TEST_COMPRESSED_FILE_CONTENTS, TEST_FILE_CONTENTS_1, TEST_FILE3_PATH, TEST_FILENAME1, TEST_FILENAME3, TEST_TAR_BZ2_FILE_PATH, @@ -64,6 +65,11 @@ class ZipArchiveClassTestCase(TarArchiveClassTestCase): archive = Archive.open(file_object=file_object) list(archive.get_members()) + def test_open_cp437_member(self): + with open(TEST_ARCHIVE_ZIP_CP437_MEMBER_PATH, mode='rb') as file_object: + archive = Archive.open(file_object=file_object) + list(archive.get_members()) + class TarGzArchiveClassTestCase(TarArchiveClassTestCase): archive_path = TEST_TAR_GZ_FILE_PATH