Remove converter.to_pdf iterator

Remove the custom iterator to return the result of a conversion to PDF. Instead returns a file object which can then be copied around using shutil.copyfileobj. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
2019-05-14 01:58:49 -04:00
parent 8b073c3151
commit 4e5c513529
3 changed files with 84 additions and 76 deletions
--- a/mayan/apps/converter/backends/python.py
+++ b/mayan/apps/converter/backends/python.py
@@ -70,16 +70,6 @@ except sh.CommandNotFound:
 logger = logging.getLogger(__name__)


-class IteratorIO(object):
-    def __init__(self, iterator):
-        self.file_buffer = io.BytesIO()
-
-        for chunk in iterator:
-            self.file_buffer.write(chunk)
-
-        self.file_buffer.seek(0)
-
-
 class Python(ConverterBase):
    def convert(self, *args, **kwargs):
        super(Python, self).convert(*args, **kwargs)
@@ -142,7 +132,7 @@ class Python(ConverterBase):

        if self.mime_type == 'application/pdf' or self.soffice_file:
            if self.soffice_file:
-                file_object = IteratorIO(self.soffice_file).file_buffer
+                file_object = self.soffice_file
            else:
                file_object = self.file_object

--- a/mayan/apps/converter/classes.py
+++ b/mayan/apps/converter/classes.py
@@ -168,20 +168,20 @@ class ConverterBase(object):
                _('LibreOffice not installed or not found.')
            )

-        new_file_object = NamedTemporaryFile()
-        input_filepath = new_file_object.name
+        with NamedTemporaryFile() as temporary_file_object:
+            # Copy the source file object of the converter instance to a
+            # named temporary file to be able to pass it to the LibreOffice
+            # execution.
            self.file_object.seek(0)
-        shutil.copyfileobj(fsrc=self.file_object, fdst=new_file_object)
+            shutil.copyfileobj(
+                fsrc=self.file_object, fdst=temporary_file_object
+            )
            self.file_object.seek(0)
-        new_file_object.seek(0)
-
-        libreoffice_filter = None
-        if self.mime_type == 'text/plain':
-            libreoffice_filter = 'Text (encoded):UTF8,LF,,,'
+            temporary_file_object.seek(0)

            libreoffice_home_directory = mkdtemp()
            args = (
-            input_filepath, '--outdir', setting_temporary_directory.value,
+                temporary_file_object.name, '--outdir', setting_temporary_directory.value,
                '-env:UserInstallation=file://{}'.format(
                    os.path.join(
                        libreoffice_home_directory, 'LibreOffice_Conversion'
@@ -191,43 +191,58 @@ class ConverterBase(object):

            kwargs = {'_env': {'HOME': libreoffice_home_directory}}

-        if libreoffice_filter:
-            kwargs.update({'infilter': libreoffice_filter})
+            if self.mime_type == 'text/plain':
+                kwargs.update(
+                    {'infilter': 'Text (encoded):UTF8,LF,,,'}
+                )

            try:
                LIBREOFFICE(*args, **kwargs)
            except sh.ErrorReturnCode as exception:
-            new_file_object.close()
+                temporary_file_object.close()
                raise OfficeConversionError(exception)
            except Exception as exception:
-            new_file_object.close()
+                temporary_file_object.close()
                logger.error('Exception launching Libre Office; %s', exception)
                raise
            finally:
                fs_cleanup(libreoffice_home_directory)

+            # LibreOffice return a PDF file with the same name as the input
+            # provided but with the .pdf extension.
+
+            # Get the converted output file path out of the temporary file
+            # name plus the temporary directory
+
            filename, extension = os.path.splitext(
-            os.path.basename(input_filepath)
+                os.path.basename(temporary_file_object.name)
            )
+
            logger.debug('filename: %s', filename)
            logger.debug('extension: %s', extension)

-        converted_output = os.path.join(
+            converted_file_path = os.path.join(
                setting_temporary_directory.value, os.path.extsep.join(
                    (filename, 'pdf')
                )
            )
-        logger.debug('converted_output: %s', converted_output)
+            logger.debug('converted_file_path: %s', converted_file_path)

-        with open(converted_output, mode='rb') as converted_file_object:
-            while True:
-                data = converted_file_object.read(CHUNK_SIZE)
-                if not data:
-                    break
-                yield data
+        # Don't use context manager with the NamedTemporaryFile on purpose
+        # so that it is deleted when the caller closes the file and not
+        # before.

-        new_file_object.close()
-        fs_cleanup(converted_output)
+        temporary_converted_file_object = NamedTemporaryFile()
+
+        # Copy the LibreOffice output file to a new named temporary file
+        # and delete the converted file
+        with open(converted_file_path, mode='rb') as converted_file_object:
+            shutil.copyfileobj(
+                fsrc=converted_file_object, fdst=temporary_converted_file_object
+            )
+        fs_cleanup(converted_file_path)
+        temporary_converted_file_object.seek(0)
+        return temporary_converted_file_object

    def to_pdf(self):
        if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
--- a/mayan/apps/documents/models/document_version_models.py
+++ b/mayan/apps/documents/models/document_version_models.py
@@ -176,16 +176,19 @@ class DocumentVersion(models.Model):

            try:
                converter = get_converter_class()(file_object=self.open())
-                pdf_file_object = converter.to_pdf()
+                with converter.to_pdf() as pdf_file_object:

                    # Since open "wb+" doesn't create files, check if the file
                    # exists, if not then create it
                    if not storage_documentimagecache.exists(cache_filename):
-                    storage_documentimagecache.save(name=cache_filename, content=ContentFile(content=''))
+                        storage_documentimagecache.save(
+                            name=cache_filename, content=ContentFile(content='')
+                        )

                    with storage_documentimagecache.open(cache_filename, mode='wb+') as file_object:
-                    for chunk in pdf_file_object:
-                        file_object.write(chunk)
+                        shutil.copyfileobj(
+                            fsrc=pdf_file_object, fdst=file_object
+                        )

                return storage_documentimagecache.open(cache_filename)
            except InvalidOfficeFormat: