2017-07-04 04:02:29 -04:00
parent 5629033578
commit 7343223f59
7 changed files with 113 additions and 38 deletions
							
							
								
							
							
						
@@ -34,6 +34,10 @@ Other Changes
  "Tools" and "Setup" sub menus, were moved from the "Profile" menu to the
  "System" menu. The "Profile" menu has been renamed to "User".
- Usability improvements on small displays.
- Removal of the CONVERTER_LIBREOFFICE_PATH and CONVERTER_PDFTOPPM_PATH
  settings. These setting have been consolidated into
  CONVERTER_GRAPHICS_BACKEND_CONFIG.
- PDF compatibility improvements.
Removals
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
from django.utils.translation import ugettext_lazy as _
DELETE_STALE_UPLOADS_INTERVAL = 60 * 10  # 10 minutes
MAYAN_PYPI_NAME = 'mayan-edms'
PYPI_URL = 'https://pypi.python.org/pypi'
							
								
							
							
							
						
 
							
							
							
						
@@ -0,0 +1,4 @@
from __future__ import unicode_literals
DEFAULT_PDFTOPPM_PATH = '/usr/bin/pdftoppm'
DEFAULT_PDFINFO_PATH = '/usr/bin/pdfinfo'
							
							
								
							
							
						
@@ -11,24 +11,40 @@ except ImportError:
from PIL import Image
import PyPDF2
from pdfminer.pdfpage import PDFPage
import sh
import yaml
from django.utils.encoding import force_text
from django.utils.translation import ugettext_lazy as _
from common.utils import fs_cleanup, mkstemp
from ..classes import ConverterBase
from ..exceptions import PageCountError
from ..settings import setting_pdftoppm_path
from ..settings import setting_graphics_backend_config
from .literals import DEFAULT_PDFTOPPM_PATH, DEFAULT_PDFINFO_PATH
try:
    pdftoppm = sh.Command(setting_pdftoppm_path.value)
    pdftoppm = sh.Command(
        yaml.load(setting_graphics_backend_config.value).get(
            'pdftoppm_path', DEFAULT_PDFTOPPM_PATH
        )
    )
except sh.CommandNotFound:
    pdftoppm = None
else:
    pdftoppm = pdftoppm.bake('-jpeg')
try:
    pdfinfo = sh.Command(
        yaml.load(setting_graphics_backend_config.value).get(
            'pdfinfo_path', DEFAULT_PDFINFO_PATH
        )
    )
except sh.CommandNotFound:
    pdfinfo = None
Image.init()
logger = logging.getLogger(__name__)
							
								
							
							
								
							
							
						
@@ -69,17 +85,34 @@ class Python(ConverterBase):
                fs_cleanup(input_filepath)
    def detect_orientation(self, page_number):
        # Default rotation: 0 degrees
        result = 0
        # Use different ways depending on the file type
        if self.mime_type == 'application/pdf':
            pdf = PyPDF2.PdfFileReader(self.file_object)
            result = pdf.getPage(page_number - 1).get('/Rotate')
            try:
                result = pdf.getPage(page_number - 1).get('/Rotate')
            except Exception as exception:
                self.file_object.seek(0)
                pdf = PyPDF2.PdfFileReader(self.file_object)
                if force_text(exception) == 'File has not been decrypted':
                    # File is encrypted, try to decrypt using a blank
                    # password.
                    try:
                        pdf.decrypt(password=b'')
                    except Exception as exception:
                        logger.error(
                            'Unable to detect PDF orientation; %s', exception
                        )
                else:
                    logger.error(
                        'Unable to detect PDF orientation; %s', exception
                    )
            finally:
                self.file_object.seek(0)
            self.file_object.seek(0)
            return result
        # Default rotation: 0 degrees
        return 0
        return result
    def get_page_count(self):
        super(Python, self).get_page_count()
							
							
							
								
							
						
@@ -87,20 +120,57 @@ class Python(ConverterBase):
        page_count = 1
        if self.mime_type == 'application/pdf' or self.soffice_file:
            # If file is a PDF open it with slate to determine the page count
            if self.soffice_file:
                file_object = IteratorIO(self.soffice_file).file_buffer
            else:
                file_object = self.file_object
            try:
                page_count = len(list(PDFPage.get_pages(file_object)))
                # Try PyPDF to determine the page number
                pdf_reader = PyPDF2.PdfFileReader(
                    stream=file_object, strict=False
                )
                page_count = pdf_reader.getNumPages()
            except Exception as exception:
                error_message = _(
                    'Exception determining PDF page count; %s'
                ) % exception
                logger.error(error_message)
                raise PageCountError(error_message)
                if force_text(exception) == 'File has not been decrypted':
                    # File is encrypted, try to decrypt using a blank
                    # password.
                    file_object.seek(0)
                    pdf_reader = PyPDF2.PdfFileReader(
                        stream=file_object, strict=False
                    )
                    try:
                        pdf_reader.decrypt(password=b'')
                        page_count = pdf_reader.getNumPages()
                    except Exception as exception:
                        file_object.seek(0)
                        if force_text(exception) == 'only algorithm code 1 and 2 are supported':
                            # PDF uses an unsupported encryption
                            # Try poppler-util's pdfinfo
                            process = pdfinfo('-', _in=file_object)
                            page_count = int(
                                filter(
                                    lambda line: line.startswith('Pages:'),
                                    process.stdout.split(b'\n')
                                )[0].replace('Pages:', '')
                            )
                            file_object.seek(0)
                            logger.debug(
                                'Document contains %d pages', page_count
                            )
                            return page_count
                        else:
                            error_message = _(
                                'Exception determining PDF page count; %s'
                            ) % exception
                            logger.error(error_message)
                            raise PageCountError(error_message)
                else:
                    error_message = _(
                        'Exception determining PDF page count; %s'
                    ) % exception
                    logger.error(error_message)
                    raise PageCountError(error_message)
            else:
                logger.debug('Document contains %d pages', page_count)
                return page_count
							
							
							
								
							
						
@@ -111,7 +181,7 @@ class Python(ConverterBase):
                image = Image.open(self.file_object)
            except IOError as exception:
                error_message = _(
                    'Exception determining PDF page count; %s'
                    'Exception determining page count using Pillow; %s'
                ) % exception
                logger.error(error_message)
                raise PageCountError(error_message)
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -11,6 +11,7 @@ except ImportError:
from PIL import Image, ImageFilter
import sh
import yaml
from django.utils.translation import string_concat, ugettext_lazy as _
							
							
							
								
							
						
@@ -19,15 +20,19 @@ from common.utils import fs_cleanup, mkstemp
from mimetype.api import get_mimetype
from .exceptions import InvalidOfficeFormat, OfficeConversionError
from .literals import DEFAULT_PAGE_NUMBER, DEFAULT_FILE_FORMAT
from .settings import setting_libreoffice_path
from .literals import (
    DEFAULT_LIBREOFFICE_PATH, DEFAULT_PAGE_NUMBER, DEFAULT_FILE_FORMAT
)
from .settings import setting_graphics_backend_config
CHUNK_SIZE = 1024
logger = logging.getLogger(__name__)
try:
    LIBREOFFICE = sh.Command(
        setting_libreoffice_path.value
        yaml.load(setting_graphics_backend_config.value).get(
            'libreoffice_path', DEFAULT_LIBREOFFICE_PATH
        )
    ).bake('--headless', '--convert-to', 'pdf')
except sh.CommandNotFound:
    LIBREOFFICE = None
							
								
							
							
								
							
							
						
@@ -114,11 +119,9 @@ class ConverterBase(object):
        Executes LibreOffice as a subprocess
        """
        if not os.path.exists(setting_libreoffice_path.value):
        if not LIBREOFFICE:
            raise OfficeConversionError(
                _(
                    'LibreOffice not installed or not found at path: %s'
                ) % setting_libreoffice_path.value
                _('LibreOffice not installed or not found.')
            )
        new_file_object, input_filepath = mkstemp()
							
								
							
							
								
							
							
						
@@ -471,7 +474,3 @@ BaseTransformation.register(TransformationRotate180)
BaseTransformation.register(TransformationRotate270)
BaseTransformation.register(TransformationUnsharpMask)
BaseTransformation.register(TransformationZoom)
							
							
							
						
 
							
							
								
							
							
						
@@ -4,5 +4,6 @@ DEFAULT_ZOOM_LEVEL = 100
DEFAULT_ROTATION = 0
DEFAULT_PAGE_NUMBER = 1
DEFAULT_FILE_FORMAT = 'JPEG'
DEFAULT_LIBREOFFICE_PATH = '/usr/bin/libreoffice'
DIMENSION_SEPARATOR = 'x'
							
							
							
						
 
							
							
								
							
							
						
@@ -10,12 +10,10 @@ setting_graphics_backend = namespace.add_setting(
    help_text=_('Graphics conversion backend to use.'),
    global_name='CONVERTER_GRAPHICS_BACKEND',
)
setting_libreoffice_path = namespace.add_setting(
    default='/usr/bin/libreoffice',
    global_name='CONVERTER_LIBREOFFICE_PATH',
    help_text=_('Path to the libreoffice program.'), is_path=True
)
setting_pdftoppm_path = namespace.add_setting(
    default='/usr/bin/pdftoppm', global_name='CONVERTER_PDFTOPPM_PATH',
    help_text=_('Path to the Popple program pdftoppm.'), is_path=True
setting_graphics_backend_config = namespace.add_setting(
    default='{libreoffice_path: /usr/bin/libreoffice, '
    'pdftoppm_path: /usr/bin/pdftoppm, pdfinfo_path: /usr/bin/pdfinfo}',
    help_text=_(
        'Configuration options for the graphics conversion backend.'
    ), global_name='CONVERTER_GRAPHICS_BACKEND_CONFIG',
)