Remove converter.to_pdf iterator

Remove the custom iterator to return the result of a conversion to PDF.
Instead returns a file object which can then be copied around
using shutil.copyfileobj.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2019-05-14 01:58:49 -04:00
parent 8b073c3151
commit 4e5c513529
3 changed files with 84 additions and 76 deletions

View File

@@ -70,16 +70,6 @@ except sh.CommandNotFound:
logger = logging.getLogger(__name__)
class IteratorIO(object):
def __init__(self, iterator):
self.file_buffer = io.BytesIO()
for chunk in iterator:
self.file_buffer.write(chunk)
self.file_buffer.seek(0)
class Python(ConverterBase):
def convert(self, *args, **kwargs):
super(Python, self).convert(*args, **kwargs)
@@ -142,7 +132,7 @@ class Python(ConverterBase):
if self.mime_type == 'application/pdf' or self.soffice_file:
if self.soffice_file:
file_object = IteratorIO(self.soffice_file).file_buffer
file_object = self.soffice_file
else:
file_object = self.file_object

View File

@@ -168,20 +168,20 @@ class ConverterBase(object):
_('LibreOffice not installed or not found.')
)
new_file_object = NamedTemporaryFile()
input_filepath = new_file_object.name
with NamedTemporaryFile() as temporary_file_object:
# Copy the source file object of the converter instance to a
# named temporary file to be able to pass it to the LibreOffice
# execution.
self.file_object.seek(0)
shutil.copyfileobj(fsrc=self.file_object, fdst=new_file_object)
shutil.copyfileobj(
fsrc=self.file_object, fdst=temporary_file_object
)
self.file_object.seek(0)
new_file_object.seek(0)
libreoffice_filter = None
if self.mime_type == 'text/plain':
libreoffice_filter = 'Text (encoded):UTF8,LF,,,'
temporary_file_object.seek(0)
libreoffice_home_directory = mkdtemp()
args = (
input_filepath, '--outdir', setting_temporary_directory.value,
temporary_file_object.name, '--outdir', setting_temporary_directory.value,
'-env:UserInstallation=file://{}'.format(
os.path.join(
libreoffice_home_directory, 'LibreOffice_Conversion'
@@ -191,43 +191,58 @@ class ConverterBase(object):
kwargs = {'_env': {'HOME': libreoffice_home_directory}}
if libreoffice_filter:
kwargs.update({'infilter': libreoffice_filter})
if self.mime_type == 'text/plain':
kwargs.update(
{'infilter': 'Text (encoded):UTF8,LF,,,'}
)
try:
LIBREOFFICE(*args, **kwargs)
except sh.ErrorReturnCode as exception:
new_file_object.close()
temporary_file_object.close()
raise OfficeConversionError(exception)
except Exception as exception:
new_file_object.close()
temporary_file_object.close()
logger.error('Exception launching Libre Office; %s', exception)
raise
finally:
fs_cleanup(libreoffice_home_directory)
# LibreOffice return a PDF file with the same name as the input
# provided but with the .pdf extension.
# Get the converted output file path out of the temporary file
# name plus the temporary directory
filename, extension = os.path.splitext(
os.path.basename(input_filepath)
os.path.basename(temporary_file_object.name)
)
logger.debug('filename: %s', filename)
logger.debug('extension: %s', extension)
converted_output = os.path.join(
converted_file_path = os.path.join(
setting_temporary_directory.value, os.path.extsep.join(
(filename, 'pdf')
)
)
logger.debug('converted_output: %s', converted_output)
logger.debug('converted_file_path: %s', converted_file_path)
with open(converted_output, mode='rb') as converted_file_object:
while True:
data = converted_file_object.read(CHUNK_SIZE)
if not data:
break
yield data
# Don't use context manager with the NamedTemporaryFile on purpose
# so that it is deleted when the caller closes the file and not
# before.
new_file_object.close()
fs_cleanup(converted_output)
temporary_converted_file_object = NamedTemporaryFile()
# Copy the LibreOffice output file to a new named temporary file
# and delete the converted file
with open(converted_file_path, mode='rb') as converted_file_object:
shutil.copyfileobj(
fsrc=converted_file_object, fdst=temporary_converted_file_object
)
fs_cleanup(converted_file_path)
temporary_converted_file_object.seek(0)
return temporary_converted_file_object
def to_pdf(self):
if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:

View File

@@ -176,16 +176,19 @@ class DocumentVersion(models.Model):
try:
converter = get_converter_class()(file_object=self.open())
pdf_file_object = converter.to_pdf()
with converter.to_pdf() as pdf_file_object:
# Since open "wb+" doesn't create files, check if the file
# exists, if not then create it
if not storage_documentimagecache.exists(cache_filename):
storage_documentimagecache.save(name=cache_filename, content=ContentFile(content=''))
storage_documentimagecache.save(
name=cache_filename, content=ContentFile(content='')
)
with storage_documentimagecache.open(cache_filename, mode='wb+') as file_object:
for chunk in pdf_file_object:
file_object.write(chunk)
shutil.copyfileobj(
fsrc=pdf_file_object, fdst=file_object
)
return storage_documentimagecache.open(cache_filename)
except InvalidOfficeFormat: