Remove converter.to_pdf iterator
Remove the custom iterator to return the result of a conversion to PDF. Instead returns a file object which can then be copied around using shutil.copyfileobj. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -70,16 +70,6 @@ except sh.CommandNotFound:
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class IteratorIO(object):
|
|
||||||
def __init__(self, iterator):
|
|
||||||
self.file_buffer = io.BytesIO()
|
|
||||||
|
|
||||||
for chunk in iterator:
|
|
||||||
self.file_buffer.write(chunk)
|
|
||||||
|
|
||||||
self.file_buffer.seek(0)
|
|
||||||
|
|
||||||
|
|
||||||
class Python(ConverterBase):
|
class Python(ConverterBase):
|
||||||
def convert(self, *args, **kwargs):
|
def convert(self, *args, **kwargs):
|
||||||
super(Python, self).convert(*args, **kwargs)
|
super(Python, self).convert(*args, **kwargs)
|
||||||
@@ -142,7 +132,7 @@ class Python(ConverterBase):
|
|||||||
|
|
||||||
if self.mime_type == 'application/pdf' or self.soffice_file:
|
if self.mime_type == 'application/pdf' or self.soffice_file:
|
||||||
if self.soffice_file:
|
if self.soffice_file:
|
||||||
file_object = IteratorIO(self.soffice_file).file_buffer
|
file_object = self.soffice_file
|
||||||
else:
|
else:
|
||||||
file_object = self.file_object
|
file_object = self.file_object
|
||||||
|
|
||||||
|
|||||||
@@ -168,66 +168,81 @@ class ConverterBase(object):
|
|||||||
_('LibreOffice not installed or not found.')
|
_('LibreOffice not installed or not found.')
|
||||||
)
|
)
|
||||||
|
|
||||||
new_file_object = NamedTemporaryFile()
|
with NamedTemporaryFile() as temporary_file_object:
|
||||||
input_filepath = new_file_object.name
|
# Copy the source file object of the converter instance to a
|
||||||
self.file_object.seek(0)
|
# named temporary file to be able to pass it to the LibreOffice
|
||||||
shutil.copyfileobj(fsrc=self.file_object, fdst=new_file_object)
|
# execution.
|
||||||
self.file_object.seek(0)
|
self.file_object.seek(0)
|
||||||
new_file_object.seek(0)
|
shutil.copyfileobj(
|
||||||
|
fsrc=self.file_object, fdst=temporary_file_object
|
||||||
libreoffice_filter = None
|
|
||||||
if self.mime_type == 'text/plain':
|
|
||||||
libreoffice_filter = 'Text (encoded):UTF8,LF,,,'
|
|
||||||
|
|
||||||
libreoffice_home_directory = mkdtemp()
|
|
||||||
args = (
|
|
||||||
input_filepath, '--outdir', setting_temporary_directory.value,
|
|
||||||
'-env:UserInstallation=file://{}'.format(
|
|
||||||
os.path.join(
|
|
||||||
libreoffice_home_directory, 'LibreOffice_Conversion'
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
kwargs = {'_env': {'HOME': libreoffice_home_directory}}
|
|
||||||
|
|
||||||
if libreoffice_filter:
|
|
||||||
kwargs.update({'infilter': libreoffice_filter})
|
|
||||||
|
|
||||||
try:
|
|
||||||
LIBREOFFICE(*args, **kwargs)
|
|
||||||
except sh.ErrorReturnCode as exception:
|
|
||||||
new_file_object.close()
|
|
||||||
raise OfficeConversionError(exception)
|
|
||||||
except Exception as exception:
|
|
||||||
new_file_object.close()
|
|
||||||
logger.error('Exception launching Libre Office; %s', exception)
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
fs_cleanup(libreoffice_home_directory)
|
|
||||||
|
|
||||||
filename, extension = os.path.splitext(
|
|
||||||
os.path.basename(input_filepath)
|
|
||||||
)
|
|
||||||
logger.debug('filename: %s', filename)
|
|
||||||
logger.debug('extension: %s', extension)
|
|
||||||
|
|
||||||
converted_output = os.path.join(
|
|
||||||
setting_temporary_directory.value, os.path.extsep.join(
|
|
||||||
(filename, 'pdf')
|
|
||||||
)
|
)
|
||||||
)
|
self.file_object.seek(0)
|
||||||
logger.debug('converted_output: %s', converted_output)
|
temporary_file_object.seek(0)
|
||||||
|
|
||||||
with open(converted_output, mode='rb') as converted_file_object:
|
libreoffice_home_directory = mkdtemp()
|
||||||
while True:
|
args = (
|
||||||
data = converted_file_object.read(CHUNK_SIZE)
|
temporary_file_object.name, '--outdir', setting_temporary_directory.value,
|
||||||
if not data:
|
'-env:UserInstallation=file://{}'.format(
|
||||||
break
|
os.path.join(
|
||||||
yield data
|
libreoffice_home_directory, 'LibreOffice_Conversion'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
new_file_object.close()
|
kwargs = {'_env': {'HOME': libreoffice_home_directory}}
|
||||||
fs_cleanup(converted_output)
|
|
||||||
|
if self.mime_type == 'text/plain':
|
||||||
|
kwargs.update(
|
||||||
|
{'infilter': 'Text (encoded):UTF8,LF,,,'}
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
LIBREOFFICE(*args, **kwargs)
|
||||||
|
except sh.ErrorReturnCode as exception:
|
||||||
|
temporary_file_object.close()
|
||||||
|
raise OfficeConversionError(exception)
|
||||||
|
except Exception as exception:
|
||||||
|
temporary_file_object.close()
|
||||||
|
logger.error('Exception launching Libre Office; %s', exception)
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
fs_cleanup(libreoffice_home_directory)
|
||||||
|
|
||||||
|
# LibreOffice return a PDF file with the same name as the input
|
||||||
|
# provided but with the .pdf extension.
|
||||||
|
|
||||||
|
# Get the converted output file path out of the temporary file
|
||||||
|
# name plus the temporary directory
|
||||||
|
|
||||||
|
filename, extension = os.path.splitext(
|
||||||
|
os.path.basename(temporary_file_object.name)
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug('filename: %s', filename)
|
||||||
|
logger.debug('extension: %s', extension)
|
||||||
|
|
||||||
|
converted_file_path = os.path.join(
|
||||||
|
setting_temporary_directory.value, os.path.extsep.join(
|
||||||
|
(filename, 'pdf')
|
||||||
|
)
|
||||||
|
)
|
||||||
|
logger.debug('converted_file_path: %s', converted_file_path)
|
||||||
|
|
||||||
|
# Don't use context manager with the NamedTemporaryFile on purpose
|
||||||
|
# so that it is deleted when the caller closes the file and not
|
||||||
|
# before.
|
||||||
|
|
||||||
|
temporary_converted_file_object = NamedTemporaryFile()
|
||||||
|
|
||||||
|
# Copy the LibreOffice output file to a new named temporary file
|
||||||
|
# and delete the converted file
|
||||||
|
with open(converted_file_path, mode='rb') as converted_file_object:
|
||||||
|
shutil.copyfileobj(
|
||||||
|
fsrc=converted_file_object, fdst=temporary_converted_file_object
|
||||||
|
)
|
||||||
|
fs_cleanup(converted_file_path)
|
||||||
|
temporary_converted_file_object.seek(0)
|
||||||
|
return temporary_converted_file_object
|
||||||
|
|
||||||
def to_pdf(self):
|
def to_pdf(self):
|
||||||
if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
|
if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
|
||||||
|
|||||||
@@ -176,16 +176,19 @@ class DocumentVersion(models.Model):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
converter = get_converter_class()(file_object=self.open())
|
converter = get_converter_class()(file_object=self.open())
|
||||||
pdf_file_object = converter.to_pdf()
|
with converter.to_pdf() as pdf_file_object:
|
||||||
|
|
||||||
# Since open "wb+" doesn't create files, check if the file
|
# Since open "wb+" doesn't create files, check if the file
|
||||||
# exists, if not then create it
|
# exists, if not then create it
|
||||||
if not storage_documentimagecache.exists(cache_filename):
|
if not storage_documentimagecache.exists(cache_filename):
|
||||||
storage_documentimagecache.save(name=cache_filename, content=ContentFile(content=''))
|
storage_documentimagecache.save(
|
||||||
|
name=cache_filename, content=ContentFile(content='')
|
||||||
|
)
|
||||||
|
|
||||||
with storage_documentimagecache.open(cache_filename, mode='wb+') as file_object:
|
with storage_documentimagecache.open(cache_filename, mode='wb+') as file_object:
|
||||||
for chunk in pdf_file_object:
|
shutil.copyfileobj(
|
||||||
file_object.write(chunk)
|
fsrc=pdf_file_object, fdst=file_object
|
||||||
|
)
|
||||||
|
|
||||||
return storage_documentimagecache.open(cache_filename)
|
return storage_documentimagecache.open(cache_filename)
|
||||||
except InvalidOfficeFormat:
|
except InvalidOfficeFormat:
|
||||||
|
|||||||
Reference in New Issue
Block a user