Files
mayan-edms/mayan/apps/converter/classes.py
Roberto Rosario d338da5491 Fix merge typo
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
2019-05-14 02:36:29 -04:00

215 lines
7.0 KiB
Python

from __future__ import unicode_literals
from io import BytesIO
import logging
import os
import shutil
from PIL import Image
import sh
import yaml
try:
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeLoader
from django.utils.translation import ugettext_lazy as _
from mayan.apps.mimetype.api import get_mimetype
from mayan.apps.storage.settings import setting_temporary_directory
from mayan.apps.storage.utils import (
NamedTemporaryFile, fs_cleanup, mkdtemp
)
from .exceptions import InvalidOfficeFormat, OfficeConversionError
from .literals import (
CONVERTER_OFFICE_FILE_MIMETYPES, DEFAULT_LIBREOFFICE_PATH,
DEFAULT_PAGE_NUMBER, DEFAULT_PILLOW_FORMAT
)
from .settings import setting_graphics_backend_config
logger = logging.getLogger(__name__)
BACKEND_CONFIG = yaml.load(
stream=setting_graphics_backend_config.value, Loader=SafeLoader
)
libreoffice_path = BACKEND_CONFIG.get(
'libreoffice_path', DEFAULT_LIBREOFFICE_PATH
)
class ConverterBase(object):
def __init__(self, file_object, mime_type=None):
self.file_object = file_object
self.image = None
self.mime_type = mime_type or get_mimetype(
file_object=file_object, mimetype_only=False
)[0]
self.soffice_file = None
Image.init()
try:
self.command_libreoffice = sh.Command(libreoffice_path).bake(
'--headless', '--convert-to', 'pdf:writer_pdf_Export'
)
except sh.CommandNotFound:
self.command_libreoffice = None
def convert(self, page_number=DEFAULT_PAGE_NUMBER):
self.page_number = page_number
def detect_orientation(self, page_number):
# Must be overrided by subclass
pass
def get_page(self, output_format=None):
output_format = output_format or yaml.load(
stream=setting_graphics_backend_config.value, Loader=SafeLoader
).get(
'pillow_format', DEFAULT_PILLOW_FORMAT
)
if not self.image:
self.seek_page(page_number=0)
image_buffer = BytesIO()
new_mode = self.image.mode
if output_format.upper() == 'JPEG':
# JPEG doesn't support transparency channel, convert the image to
# RGB. Removes modes: P and RGBA
new_mode = 'RGB'
self.image.convert(new_mode).save(image_buffer, format=output_format)
image_buffer.seek(0)
return image_buffer
def get_page_count(self):
try:
self.soffice_file = self.to_pdf()
except InvalidOfficeFormat as exception:
logger.debug('Is not an office format document; %s', exception)
def seek_page(self, page_number):
"""
Seek the specified page number from the source file object.
If the file is a paged image get the page if not convert it to a
paged image format and return the specified page as an image.
"""
# Starting with #0
self.file_object.seek(0)
try:
self.image = Image.open(self.file_object)
except IOError:
# Cannot identify image file
self.image = self.convert(page_number=page_number)
else:
self.image.seek(page_number)
self.image.load()
def soffice(self):
"""
Executes LibreOffice as a subprocess
"""
if not self.command_libreoffice:
raise OfficeConversionError(
_('LibreOffice not installed or not found.')
)
with NamedTemporaryFile() as temporary_file_object:
# Copy the source file object of the converter instance to a
# named temporary file to be able to pass it to the LibreOffice
# execution.
self.file_object.seek(0)
shutil.copyfileobj(
fsrc=self.file_object, fdst=temporary_file_object
)
self.file_object.seek(0)
temporary_file_object.seek(0)
libreoffice_home_directory = mkdtemp()
args = (
temporary_file_object.name, '--outdir', setting_temporary_directory.value,
'-env:UserInstallation=file://{}'.format(
os.path.join(
libreoffice_home_directory, 'LibreOffice_Conversion'
)
),
)
kwargs = {'_env': {'HOME': libreoffice_home_directory}}
if self.mime_type == 'text/plain':
kwargs.update(
{'infilter': 'Text (encoded):UTF8,LF,,,'}
)
try:
self.command_libreoffice(*args, **kwargs)
except sh.ErrorReturnCode as exception:
temporary_file_object.close()
raise OfficeConversionError(exception)
except Exception as exception:
temporary_file_object.close()
logger.error('Exception launching Libre Office; %s', exception)
raise
finally:
fs_cleanup(libreoffice_home_directory)
# LibreOffice return a PDF file with the same name as the input
# provided but with the .pdf extension.
# Get the converted output file path out of the temporary file
# name plus the temporary directory
filename, extension = os.path.splitext(
os.path.basename(temporary_file_object.name)
)
logger.debug('filename: %s', filename)
logger.debug('extension: %s', extension)
converted_file_path = os.path.join(
setting_temporary_directory.value, os.path.extsep.join(
(filename, 'pdf')
)
)
logger.debug('converted_file_path: %s', converted_file_path)
# Don't use context manager with the NamedTemporaryFile on purpose
# so that it is deleted when the caller closes the file and not
# before.
temporary_converted_file_object = NamedTemporaryFile()
# Copy the LibreOffice output file to a new named temporary file
# and delete the converted file
with open(converted_file_path, mode='rb') as converted_file_object:
shutil.copyfileobj(
fsrc=converted_file_object, fdst=temporary_converted_file_object
)
fs_cleanup(converted_file_path)
temporary_converted_file_object.seek(0)
return temporary_converted_file_object
def to_pdf(self):
if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
return self.soffice()
else:
raise InvalidOfficeFormat(_('Not an office file format.'))
def transform(self, transformation):
if not self.image:
self.seek_page(page_number=0)
self.image = transformation.execute_on(image=self.image)
def transform_many(self, transformations):
if not self.image:
self.seek_page(page_number=0)
for transformation in transformations:
self.image = transformation.execute_on(image=self.image)