Remove converter.to_pdf iterator
Remove the custom iterator to return the result of a conversion to PDF. Instead returns a file object which can then be copied around using shutil.copyfileobj. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -70,16 +70,6 @@ except sh.CommandNotFound:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IteratorIO(object):
|
||||
def __init__(self, iterator):
|
||||
self.file_buffer = io.BytesIO()
|
||||
|
||||
for chunk in iterator:
|
||||
self.file_buffer.write(chunk)
|
||||
|
||||
self.file_buffer.seek(0)
|
||||
|
||||
|
||||
class Python(ConverterBase):
|
||||
def convert(self, *args, **kwargs):
|
||||
super(Python, self).convert(*args, **kwargs)
|
||||
@@ -142,7 +132,7 @@ class Python(ConverterBase):
|
||||
|
||||
if self.mime_type == 'application/pdf' or self.soffice_file:
|
||||
if self.soffice_file:
|
||||
file_object = IteratorIO(self.soffice_file).file_buffer
|
||||
file_object = self.soffice_file
|
||||
else:
|
||||
file_object = self.file_object
|
||||
|
||||
|
||||
@@ -168,20 +168,20 @@ class ConverterBase(object):
|
||||
_('LibreOffice not installed or not found.')
|
||||
)
|
||||
|
||||
new_file_object = NamedTemporaryFile()
|
||||
input_filepath = new_file_object.name
|
||||
with NamedTemporaryFile() as temporary_file_object:
|
||||
# Copy the source file object of the converter instance to a
|
||||
# named temporary file to be able to pass it to the LibreOffice
|
||||
# execution.
|
||||
self.file_object.seek(0)
|
||||
shutil.copyfileobj(fsrc=self.file_object, fdst=new_file_object)
|
||||
shutil.copyfileobj(
|
||||
fsrc=self.file_object, fdst=temporary_file_object
|
||||
)
|
||||
self.file_object.seek(0)
|
||||
new_file_object.seek(0)
|
||||
|
||||
libreoffice_filter = None
|
||||
if self.mime_type == 'text/plain':
|
||||
libreoffice_filter = 'Text (encoded):UTF8,LF,,,'
|
||||
temporary_file_object.seek(0)
|
||||
|
||||
libreoffice_home_directory = mkdtemp()
|
||||
args = (
|
||||
input_filepath, '--outdir', setting_temporary_directory.value,
|
||||
temporary_file_object.name, '--outdir', setting_temporary_directory.value,
|
||||
'-env:UserInstallation=file://{}'.format(
|
||||
os.path.join(
|
||||
libreoffice_home_directory, 'LibreOffice_Conversion'
|
||||
@@ -191,43 +191,58 @@ class ConverterBase(object):
|
||||
|
||||
kwargs = {'_env': {'HOME': libreoffice_home_directory}}
|
||||
|
||||
if libreoffice_filter:
|
||||
kwargs.update({'infilter': libreoffice_filter})
|
||||
if self.mime_type == 'text/plain':
|
||||
kwargs.update(
|
||||
{'infilter': 'Text (encoded):UTF8,LF,,,'}
|
||||
)
|
||||
|
||||
try:
|
||||
LIBREOFFICE(*args, **kwargs)
|
||||
except sh.ErrorReturnCode as exception:
|
||||
new_file_object.close()
|
||||
temporary_file_object.close()
|
||||
raise OfficeConversionError(exception)
|
||||
except Exception as exception:
|
||||
new_file_object.close()
|
||||
temporary_file_object.close()
|
||||
logger.error('Exception launching Libre Office; %s', exception)
|
||||
raise
|
||||
finally:
|
||||
fs_cleanup(libreoffice_home_directory)
|
||||
|
||||
# LibreOffice return a PDF file with the same name as the input
|
||||
# provided but with the .pdf extension.
|
||||
|
||||
# Get the converted output file path out of the temporary file
|
||||
# name plus the temporary directory
|
||||
|
||||
filename, extension = os.path.splitext(
|
||||
os.path.basename(input_filepath)
|
||||
os.path.basename(temporary_file_object.name)
|
||||
)
|
||||
|
||||
logger.debug('filename: %s', filename)
|
||||
logger.debug('extension: %s', extension)
|
||||
|
||||
converted_output = os.path.join(
|
||||
converted_file_path = os.path.join(
|
||||
setting_temporary_directory.value, os.path.extsep.join(
|
||||
(filename, 'pdf')
|
||||
)
|
||||
)
|
||||
logger.debug('converted_output: %s', converted_output)
|
||||
logger.debug('converted_file_path: %s', converted_file_path)
|
||||
|
||||
with open(converted_output, mode='rb') as converted_file_object:
|
||||
while True:
|
||||
data = converted_file_object.read(CHUNK_SIZE)
|
||||
if not data:
|
||||
break
|
||||
yield data
|
||||
# Don't use context manager with the NamedTemporaryFile on purpose
|
||||
# so that it is deleted when the caller closes the file and not
|
||||
# before.
|
||||
|
||||
new_file_object.close()
|
||||
fs_cleanup(converted_output)
|
||||
temporary_converted_file_object = NamedTemporaryFile()
|
||||
|
||||
# Copy the LibreOffice output file to a new named temporary file
|
||||
# and delete the converted file
|
||||
with open(converted_file_path, mode='rb') as converted_file_object:
|
||||
shutil.copyfileobj(
|
||||
fsrc=converted_file_object, fdst=temporary_converted_file_object
|
||||
)
|
||||
fs_cleanup(converted_file_path)
|
||||
temporary_converted_file_object.seek(0)
|
||||
return temporary_converted_file_object
|
||||
|
||||
def to_pdf(self):
|
||||
if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
|
||||
|
||||
@@ -176,16 +176,19 @@ class DocumentVersion(models.Model):
|
||||
|
||||
try:
|
||||
converter = get_converter_class()(file_object=self.open())
|
||||
pdf_file_object = converter.to_pdf()
|
||||
with converter.to_pdf() as pdf_file_object:
|
||||
|
||||
# Since open "wb+" doesn't create files, check if the file
|
||||
# exists, if not then create it
|
||||
if not storage_documentimagecache.exists(cache_filename):
|
||||
storage_documentimagecache.save(name=cache_filename, content=ContentFile(content=''))
|
||||
storage_documentimagecache.save(
|
||||
name=cache_filename, content=ContentFile(content='')
|
||||
)
|
||||
|
||||
with storage_documentimagecache.open(cache_filename, mode='wb+') as file_object:
|
||||
for chunk in pdf_file_object:
|
||||
file_object.write(chunk)
|
||||
shutil.copyfileobj(
|
||||
fsrc=pdf_file_object, fdst=file_object
|
||||
)
|
||||
|
||||
return storage_documentimagecache.open(cache_filename)
|
||||
except InvalidOfficeFormat:
|
||||
|
||||
Reference in New Issue
Block a user