Compare commits

...

1 Commits

Author SHA1 Message Date
Roberto Rosario
7889ff64d9 Document zip file encoding issues
Signed-off-by: Roberto Rosario <roberto.rosario@mayan-edms.com>
2019-10-01 00:20:50 -04:00
4 changed files with 38 additions and 3 deletions

View File

@@ -11,6 +11,7 @@ except ImportError:
COMPRESSION = zipfile.ZIP_STORED COMPRESSION = zipfile.ZIP_STORED
from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadedfile import SimpleUploadedFile
from django.utils.encoding import force_text
from mayan.apps.mimetype.api import get_mimetype from mayan.apps.mimetype.api import get_mimetype
@@ -136,9 +137,32 @@ class ZipArchive(Archive):
return self._archive.read(filename) return self._archive.read(filename)
def members(self): def members(self):
return [ results = []
filename for filename in self._archive.namelist() if not filename.endswith('/') from django.utils.encoding import force_str
]
for filename in self._archive.namelist():
# Zip files only support UTF-8 and CP437 encodings.
# Attempt to decode CP437 to be able to check if it ends
# with a slash.
# Future improvement that violates the Zip format:
# Add chardet.detect to detect the most likely encoding
# if other than CP437.
try:
filename = filename.decode('CP437')
is_unicode = False
except UnicodeEncodeError:
is_unicode = True
if not filename.endswith('/'):
# Re encode in the original encoding
if not is_unicode:
filename = filename.encode(
encoding='CP437', errors='strict'
)
results.append(filename)
return results
def open_member(self, filename): def open_member(self, filename):
return self._archive.open(filename) return self._archive.open(filename)

View File

@@ -12,6 +12,7 @@ TEST_VIEW_URL = 'test-view-url'
# Filenames # Filenames
TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER = 'test_archvive_with_special_characters_filename_member.zip' TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER = 'test_archvive_with_special_characters_filename_member.zip'
TEST_ARCHIVE_ZIP_CP437_MEMBER = 'test_archvive_with_cp437_member.zip'
TEST_FILENAME1 = 'test_file1.txt' TEST_FILENAME1 = 'test_file1.txt'
TEST_FILENAME2 = 'test_file2.txt' TEST_FILENAME2 = 'test_file2.txt'
TEST_FILENAME3 = 'test_file3.txt' TEST_FILENAME3 = 'test_file3.txt'
@@ -28,6 +29,10 @@ TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib',
TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER
) )
TEST_ARCHIVE_ZIP_CP437_MEMBER_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib',
TEST_ARCHIVE_ZIP_CP437_MEMBER
)
TEST_FILE3_PATH = os.path.join( TEST_FILE3_PATH = os.path.join(
settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_FILENAME3 settings.BASE_DIR, 'apps', 'common', 'tests', 'contrib', TEST_FILENAME3
) )

View File

@@ -5,6 +5,7 @@ from mayan.apps.common.tests import BaseTestCase
from ..compressed_files import Archive, TarArchive, ZipArchive from ..compressed_files import Archive, TarArchive, ZipArchive
from .literals import ( from .literals import (
TEST_ARCHIVE_ZIP_CP437_MEMBER_PATH,
TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER_PATH, TEST_ARCHIVE_ZIP_SPECIAL_CHARACTERS_FILENAME_MEMBER_PATH,
TEST_COMPRESSED_FILE_CONTENTS, TEST_FILE_CONTENTS_1, TEST_FILE3_PATH, TEST_COMPRESSED_FILE_CONTENTS, TEST_FILE_CONTENTS_1, TEST_FILE3_PATH,
TEST_FILENAME1, TEST_FILENAME3, TEST_TAR_BZ2_FILE_PATH, TEST_FILENAME1, TEST_FILENAME3, TEST_TAR_BZ2_FILE_PATH,
@@ -64,6 +65,11 @@ class ZipArchiveClassTestCase(TarArchiveClassTestCase):
archive = Archive.open(file_object=file_object) archive = Archive.open(file_object=file_object)
list(archive.get_members()) list(archive.get_members())
def test_open_cp437_member(self):
with open(TEST_ARCHIVE_ZIP_CP437_MEMBER_PATH, mode='rb') as file_object:
archive = Archive.open(file_object=file_object)
list(archive.get_members())
class TarGzArchiveClassTestCase(TarArchiveClassTestCase): class TarGzArchiveClassTestCase(TarArchiveClassTestCase):
archive_path = TEST_TAR_GZ_FILE_PATH archive_path = TEST_TAR_GZ_FILE_PATH