Remove converter.to_pdf iterator

Remove the custom iterator to return the result of a conversion to PDF. Instead returns a file object which can then be copied around using shutil.copyfileobj. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
2019-05-14 01:58:49 -04:00
parent 8b073c3151
commit 4e5c513529
3 changed files with 84 additions and 76 deletions
--- a/mayan/apps/converter/backends/python.py
+++ b/mayan/apps/converter/backends/python.py
@@ -70,16 +70,6 @@ except sh.CommandNotFound:
 logger = logging.getLogger(__name__)
 class IteratorIO(object):
    def __init__(self, iterator):
        self.file_buffer = io.BytesIO()
        for chunk in iterator:
            self.file_buffer.write(chunk)
        self.file_buffer.seek(0)
 class Python(ConverterBase):
    def convert(self, *args, **kwargs):
        super(Python, self).convert(*args, **kwargs)
@@ -142,7 +132,7 @@ class Python(ConverterBase):
        if self.mime_type == 'application/pdf' or self.soffice_file:
            if self.soffice_file:
-                file_object = IteratorIO(self.soffice_file).file_buffer
+                file_object = self.soffice_file
            else:
                file_object = self.file_object
--- a/mayan/apps/converter/classes.py
+++ b/mayan/apps/converter/classes.py
@@ -168,66 +168,81 @@ class ConverterBase(object):
                _('LibreOffice not installed or not found.')
            )
-        new_file_object = NamedTemporaryFile()
+        with NamedTemporaryFile() as temporary_file_object:
-        input_filepath = new_file_object.name
+            # Copy the source file object of the converter instance to a
-        self.file_object.seek(0)
+            # named temporary file to be able to pass it to the LibreOffice
-        shutil.copyfileobj(fsrc=self.file_object, fdst=new_file_object)
+            # execution.
-        self.file_object.seek(0)
+            self.file_object.seek(0)
-        new_file_object.seek(0)
+            shutil.copyfileobj(
-
+                fsrc=self.file_object, fdst=temporary_file_object
        libreoffice_filter = None
        if self.mime_type == 'text/plain':
            libreoffice_filter = 'Text (encoded):UTF8,LF,,,'
        libreoffice_home_directory = mkdtemp()
        args = (
            input_filepath, '--outdir', setting_temporary_directory.value,
            '-env:UserInstallation=file://{}'.format(
                os.path.join(
                    libreoffice_home_directory, 'LibreOffice_Conversion'
                )
            ),
        )
        kwargs = {'_env': {'HOME': libreoffice_home_directory}}
        if libreoffice_filter:
            kwargs.update({'infilter': libreoffice_filter})
        try:
            LIBREOFFICE(*args, **kwargs)
        except sh.ErrorReturnCode as exception:
            new_file_object.close()
            raise OfficeConversionError(exception)
        except Exception as exception:
            new_file_object.close()
            logger.error('Exception launching Libre Office; %s', exception)
            raise
        finally:
            fs_cleanup(libreoffice_home_directory)
        filename, extension = os.path.splitext(
            os.path.basename(input_filepath)
        )
        logger.debug('filename: %s', filename)
        logger.debug('extension: %s', extension)
        converted_output = os.path.join(
            setting_temporary_directory.value, os.path.extsep.join(
                (filename, 'pdf')
            )
-        )
+            self.file_object.seek(0)
-        logger.debug('converted_output: %s', converted_output)
+            temporary_file_object.seek(0)
-        with open(converted_output, mode='rb') as converted_file_object:
+            libreoffice_home_directory = mkdtemp()
-            while True:
+            args = (
-                data = converted_file_object.read(CHUNK_SIZE)
+                temporary_file_object.name, '--outdir', setting_temporary_directory.value,
-                if not data:
+                '-env:UserInstallation=file://{}'.format(
-                    break
+                    os.path.join(
-                yield data
+                        libreoffice_home_directory, 'LibreOffice_Conversion'
                    )
                ),
            )
-        new_file_object.close()
+            kwargs = {'_env': {'HOME': libreoffice_home_directory}}
-        fs_cleanup(converted_output)
+
            if self.mime_type == 'text/plain':
                kwargs.update(
                    {'infilter': 'Text (encoded):UTF8,LF,,,'}
                )
            try:
                LIBREOFFICE(*args, **kwargs)
            except sh.ErrorReturnCode as exception:
                temporary_file_object.close()
                raise OfficeConversionError(exception)
            except Exception as exception:
                temporary_file_object.close()
                logger.error('Exception launching Libre Office; %s', exception)
                raise
            finally:
                fs_cleanup(libreoffice_home_directory)
            # LibreOffice return a PDF file with the same name as the input
            # provided but with the .pdf extension.
            # Get the converted output file path out of the temporary file
            # name plus the temporary directory
            filename, extension = os.path.splitext(
                os.path.basename(temporary_file_object.name)
            )
            logger.debug('filename: %s', filename)
            logger.debug('extension: %s', extension)
            converted_file_path = os.path.join(
                setting_temporary_directory.value, os.path.extsep.join(
                    (filename, 'pdf')
                )
            )
            logger.debug('converted_file_path: %s', converted_file_path)
        # Don't use context manager with the NamedTemporaryFile on purpose
        # so that it is deleted when the caller closes the file and not
        # before.
        temporary_converted_file_object = NamedTemporaryFile()
        # Copy the LibreOffice output file to a new named temporary file
        # and delete the converted file
        with open(converted_file_path, mode='rb') as converted_file_object:
            shutil.copyfileobj(
                fsrc=converted_file_object, fdst=temporary_converted_file_object
            )
        fs_cleanup(converted_file_path)
        temporary_converted_file_object.seek(0)
        return temporary_converted_file_object
    def to_pdf(self):
        if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
--- a/mayan/apps/documents/models/document_version_models.py
+++ b/mayan/apps/documents/models/document_version_models.py
@@ -176,16 +176,19 @@ class DocumentVersion(models.Model):
            try:
                converter = get_converter_class()(file_object=self.open())
-                pdf_file_object = converter.to_pdf()
+                with converter.to_pdf() as pdf_file_object:
-                # Since open "wb+" doesn't create files, check if the file
+                    # Since open "wb+" doesn't create files, check if the file
-                # exists, if not then create it
+                    # exists, if not then create it
-                if not storage_documentimagecache.exists(cache_filename):
+                    if not storage_documentimagecache.exists(cache_filename):
-                    storage_documentimagecache.save(name=cache_filename, content=ContentFile(content=''))
+                        storage_documentimagecache.save(
                            name=cache_filename, content=ContentFile(content='')
                        )
-                with storage_documentimagecache.open(cache_filename, mode='wb+') as file_object:
+                    with storage_documentimagecache.open(cache_filename, mode='wb+') as file_object:
-                    for chunk in pdf_file_object:
+                        shutil.copyfileobj(
-                        file_object.write(chunk)
+                            fsrc=pdf_file_object, fdst=file_object
                        )
                return storage_documentimagecache.open(cache_filename)
            except InvalidOfficeFormat: