diff --git a/mayan/apps/converter/backends/python.py b/mayan/apps/converter/backends/python.py index aa13aea99c..672997f879 100644 --- a/mayan/apps/converter/backends/python.py +++ b/mayan/apps/converter/backends/python.py @@ -70,16 +70,6 @@ except sh.CommandNotFound: logger = logging.getLogger(__name__) -class IteratorIO(object): - def __init__(self, iterator): - self.file_buffer = io.BytesIO() - - for chunk in iterator: - self.file_buffer.write(chunk) - - self.file_buffer.seek(0) - - class Python(ConverterBase): def convert(self, *args, **kwargs): super(Python, self).convert(*args, **kwargs) @@ -142,7 +132,7 @@ class Python(ConverterBase): if self.mime_type == 'application/pdf' or self.soffice_file: if self.soffice_file: - file_object = IteratorIO(self.soffice_file).file_buffer + file_object = self.soffice_file else: file_object = self.file_object diff --git a/mayan/apps/converter/classes.py b/mayan/apps/converter/classes.py index d62e9af77c..1cd99f49cf 100644 --- a/mayan/apps/converter/classes.py +++ b/mayan/apps/converter/classes.py @@ -168,66 +168,81 @@ class ConverterBase(object): _('LibreOffice not installed or not found.') ) - new_file_object = NamedTemporaryFile() - input_filepath = new_file_object.name - self.file_object.seek(0) - shutil.copyfileobj(fsrc=self.file_object, fdst=new_file_object) - self.file_object.seek(0) - new_file_object.seek(0) - - libreoffice_filter = None - if self.mime_type == 'text/plain': - libreoffice_filter = 'Text (encoded):UTF8,LF,,,' - - libreoffice_home_directory = mkdtemp() - args = ( - input_filepath, '--outdir', setting_temporary_directory.value, - '-env:UserInstallation=file://{}'.format( - os.path.join( - libreoffice_home_directory, 'LibreOffice_Conversion' - ) - ), - ) - - kwargs = {'_env': {'HOME': libreoffice_home_directory}} - - if libreoffice_filter: - kwargs.update({'infilter': libreoffice_filter}) - - try: - LIBREOFFICE(*args, **kwargs) - except sh.ErrorReturnCode as exception: - new_file_object.close() - raise OfficeConversionError(exception) - except Exception as exception: - new_file_object.close() - logger.error('Exception launching Libre Office; %s', exception) - raise - finally: - fs_cleanup(libreoffice_home_directory) - - filename, extension = os.path.splitext( - os.path.basename(input_filepath) - ) - logger.debug('filename: %s', filename) - logger.debug('extension: %s', extension) - - converted_output = os.path.join( - setting_temporary_directory.value, os.path.extsep.join( - (filename, 'pdf') + with NamedTemporaryFile() as temporary_file_object: + # Copy the source file object of the converter instance to a + # named temporary file to be able to pass it to the LibreOffice + # execution. + self.file_object.seek(0) + shutil.copyfileobj( + fsrc=self.file_object, fdst=temporary_file_object ) - ) - logger.debug('converted_output: %s', converted_output) + self.file_object.seek(0) + temporary_file_object.seek(0) - with open(converted_output, mode='rb') as converted_file_object: - while True: - data = converted_file_object.read(CHUNK_SIZE) - if not data: - break - yield data + libreoffice_home_directory = mkdtemp() + args = ( + temporary_file_object.name, '--outdir', setting_temporary_directory.value, + '-env:UserInstallation=file://{}'.format( + os.path.join( + libreoffice_home_directory, 'LibreOffice_Conversion' + ) + ), + ) - new_file_object.close() - fs_cleanup(converted_output) + kwargs = {'_env': {'HOME': libreoffice_home_directory}} + + if self.mime_type == 'text/plain': + kwargs.update( + {'infilter': 'Text (encoded):UTF8,LF,,,'} + ) + + try: + LIBREOFFICE(*args, **kwargs) + except sh.ErrorReturnCode as exception: + temporary_file_object.close() + raise OfficeConversionError(exception) + except Exception as exception: + temporary_file_object.close() + logger.error('Exception launching Libre Office; %s', exception) + raise + finally: + fs_cleanup(libreoffice_home_directory) + + # LibreOffice return a PDF file with the same name as the input + # provided but with the .pdf extension. + + # Get the converted output file path out of the temporary file + # name plus the temporary directory + + filename, extension = os.path.splitext( + os.path.basename(temporary_file_object.name) + ) + + logger.debug('filename: %s', filename) + logger.debug('extension: %s', extension) + + converted_file_path = os.path.join( + setting_temporary_directory.value, os.path.extsep.join( + (filename, 'pdf') + ) + ) + logger.debug('converted_file_path: %s', converted_file_path) + + # Don't use context manager with the NamedTemporaryFile on purpose + # so that it is deleted when the caller closes the file and not + # before. + + temporary_converted_file_object = NamedTemporaryFile() + + # Copy the LibreOffice output file to a new named temporary file + # and delete the converted file + with open(converted_file_path, mode='rb') as converted_file_object: + shutil.copyfileobj( + fsrc=converted_file_object, fdst=temporary_converted_file_object + ) + fs_cleanup(converted_file_path) + temporary_converted_file_object.seek(0) + return temporary_converted_file_object def to_pdf(self): if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES: diff --git a/mayan/apps/documents/models/document_version_models.py b/mayan/apps/documents/models/document_version_models.py index 5afda517f2..539eb3cbea 100644 --- a/mayan/apps/documents/models/document_version_models.py +++ b/mayan/apps/documents/models/document_version_models.py @@ -176,16 +176,19 @@ class DocumentVersion(models.Model): try: converter = get_converter_class()(file_object=self.open()) - pdf_file_object = converter.to_pdf() + with converter.to_pdf() as pdf_file_object: - # Since open "wb+" doesn't create files, check if the file - # exists, if not then create it - if not storage_documentimagecache.exists(cache_filename): - storage_documentimagecache.save(name=cache_filename, content=ContentFile(content='')) + # Since open "wb+" doesn't create files, check if the file + # exists, if not then create it + if not storage_documentimagecache.exists(cache_filename): + storage_documentimagecache.save( + name=cache_filename, content=ContentFile(content='') + ) - with storage_documentimagecache.open(cache_filename, mode='wb+') as file_object: - for chunk in pdf_file_object: - file_object.write(chunk) + with storage_documentimagecache.open(cache_filename, mode='wb+') as file_object: + shutil.copyfileobj( + fsrc=pdf_file_object, fdst=file_object + ) return storage_documentimagecache.open(cache_filename) except InvalidOfficeFormat: