diff --git a/docs/releases/2.5.rst b/docs/releases/2.5.rst index 3001746eb4..b6a37a76ca 100644 --- a/docs/releases/2.5.rst +++ b/docs/releases/2.5.rst @@ -34,6 +34,10 @@ Other Changes "Tools" and "Setup" sub menus, were moved from the "Profile" menu to the "System" menu. The "Profile" menu has been renamed to "User". - Usability improvements on small displays. +- Removal of the CONVERTER_LIBREOFFICE_PATH and CONVERTER_PDFTOPPM_PATH + settings. These setting have been consolidated into + CONVERTER_GRAPHICS_BACKEND_CONFIG. +- PDF compatibility improvements. Removals diff --git a/mayan/apps/common/literals.py b/mayan/apps/common/literals.py index af4bd308ad..c927976400 100644 --- a/mayan/apps/common/literals.py +++ b/mayan/apps/common/literals.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from django.utils.translation import ugettext_lazy as _ - DELETE_STALE_UPLOADS_INTERVAL = 60 * 10 # 10 minutes MAYAN_PYPI_NAME = 'mayan-edms' PYPI_URL = 'https://pypi.python.org/pypi' diff --git a/mayan/apps/converter/backends/literals.py b/mayan/apps/converter/backends/literals.py new file mode 100644 index 0000000000..7442f70b1d --- /dev/null +++ b/mayan/apps/converter/backends/literals.py @@ -0,0 +1,4 @@ +from __future__ import unicode_literals + +DEFAULT_PDFTOPPM_PATH = '/usr/bin/pdftoppm' +DEFAULT_PDFINFO_PATH = '/usr/bin/pdfinfo' diff --git a/mayan/apps/converter/backends/python.py b/mayan/apps/converter/backends/python.py index 381e2e81e0..5247c8af1c 100644 --- a/mayan/apps/converter/backends/python.py +++ b/mayan/apps/converter/backends/python.py @@ -11,24 +11,40 @@ except ImportError: from PIL import Image import PyPDF2 -from pdfminer.pdfpage import PDFPage import sh +import yaml +from django.utils.encoding import force_text from django.utils.translation import ugettext_lazy as _ from common.utils import fs_cleanup, mkstemp from ..classes import ConverterBase from ..exceptions import PageCountError -from ..settings import setting_pdftoppm_path +from ..settings import setting_graphics_backend_config + +from .literals import DEFAULT_PDFTOPPM_PATH, DEFAULT_PDFINFO_PATH try: - pdftoppm = sh.Command(setting_pdftoppm_path.value) + pdftoppm = sh.Command( + yaml.load(setting_graphics_backend_config.value).get( + 'pdftoppm_path', DEFAULT_PDFTOPPM_PATH + ) + ) except sh.CommandNotFound: pdftoppm = None else: pdftoppm = pdftoppm.bake('-jpeg') +try: + pdfinfo = sh.Command( + yaml.load(setting_graphics_backend_config.value).get( + 'pdfinfo_path', DEFAULT_PDFINFO_PATH + ) + ) +except sh.CommandNotFound: + pdfinfo = None + Image.init() logger = logging.getLogger(__name__) @@ -69,17 +85,34 @@ class Python(ConverterBase): fs_cleanup(input_filepath) def detect_orientation(self, page_number): + # Default rotation: 0 degrees + result = 0 + # Use different ways depending on the file type if self.mime_type == 'application/pdf': pdf = PyPDF2.PdfFileReader(self.file_object) - result = pdf.getPage(page_number - 1).get('/Rotate') + try: + result = pdf.getPage(page_number - 1).get('/Rotate') + except Exception as exception: + self.file_object.seek(0) + pdf = PyPDF2.PdfFileReader(self.file_object) + if force_text(exception) == 'File has not been decrypted': + # File is encrypted, try to decrypt using a blank + # password. + try: + pdf.decrypt(password=b'') + except Exception as exception: + logger.error( + 'Unable to detect PDF orientation; %s', exception + ) + else: + logger.error( + 'Unable to detect PDF orientation; %s', exception + ) + finally: + self.file_object.seek(0) - self.file_object.seek(0) - - return result - - # Default rotation: 0 degrees - return 0 + return result def get_page_count(self): super(Python, self).get_page_count() @@ -87,20 +120,57 @@ class Python(ConverterBase): page_count = 1 if self.mime_type == 'application/pdf' or self.soffice_file: - # If file is a PDF open it with slate to determine the page count if self.soffice_file: file_object = IteratorIO(self.soffice_file).file_buffer else: file_object = self.file_object try: - page_count = len(list(PDFPage.get_pages(file_object))) + # Try PyPDF to determine the page number + pdf_reader = PyPDF2.PdfFileReader( + stream=file_object, strict=False + ) + page_count = pdf_reader.getNumPages() except Exception as exception: - error_message = _( - 'Exception determining PDF page count; %s' - ) % exception - logger.error(error_message) - raise PageCountError(error_message) + if force_text(exception) == 'File has not been decrypted': + # File is encrypted, try to decrypt using a blank + # password. + file_object.seek(0) + pdf_reader = PyPDF2.PdfFileReader( + stream=file_object, strict=False + ) + try: + pdf_reader.decrypt(password=b'') + page_count = pdf_reader.getNumPages() + except Exception as exception: + file_object.seek(0) + if force_text(exception) == 'only algorithm code 1 and 2 are supported': + # PDF uses an unsupported encryption + # Try poppler-util's pdfinfo + process = pdfinfo('-', _in=file_object) + page_count = int( + filter( + lambda line: line.startswith('Pages:'), + process.stdout.split(b'\n') + )[0].replace('Pages:', '') + ) + file_object.seek(0) + logger.debug( + 'Document contains %d pages', page_count + ) + return page_count + else: + error_message = _( + 'Exception determining PDF page count; %s' + ) % exception + logger.error(error_message) + raise PageCountError(error_message) + else: + error_message = _( + 'Exception determining PDF page count; %s' + ) % exception + logger.error(error_message) + raise PageCountError(error_message) else: logger.debug('Document contains %d pages', page_count) return page_count @@ -111,7 +181,7 @@ class Python(ConverterBase): image = Image.open(self.file_object) except IOError as exception: error_message = _( - 'Exception determining PDF page count; %s' + 'Exception determining page count using Pillow; %s' ) % exception logger.error(error_message) raise PageCountError(error_message) diff --git a/mayan/apps/converter/classes.py b/mayan/apps/converter/classes.py index ab2e5be417..e02bcf2440 100644 --- a/mayan/apps/converter/classes.py +++ b/mayan/apps/converter/classes.py @@ -11,6 +11,7 @@ except ImportError: from PIL import Image, ImageFilter import sh +import yaml from django.utils.translation import string_concat, ugettext_lazy as _ @@ -19,15 +20,19 @@ from common.utils import fs_cleanup, mkstemp from mimetype.api import get_mimetype from .exceptions import InvalidOfficeFormat, OfficeConversionError -from .literals import DEFAULT_PAGE_NUMBER, DEFAULT_FILE_FORMAT -from .settings import setting_libreoffice_path +from .literals import ( + DEFAULT_LIBREOFFICE_PATH, DEFAULT_PAGE_NUMBER, DEFAULT_FILE_FORMAT +) +from .settings import setting_graphics_backend_config CHUNK_SIZE = 1024 logger = logging.getLogger(__name__) try: LIBREOFFICE = sh.Command( - setting_libreoffice_path.value + yaml.load(setting_graphics_backend_config.value).get( + 'libreoffice_path', DEFAULT_LIBREOFFICE_PATH + ) ).bake('--headless', '--convert-to', 'pdf') except sh.CommandNotFound: LIBREOFFICE = None @@ -114,11 +119,9 @@ class ConverterBase(object): Executes LibreOffice as a subprocess """ - if not os.path.exists(setting_libreoffice_path.value): + if not LIBREOFFICE: raise OfficeConversionError( - _( - 'LibreOffice not installed or not found at path: %s' - ) % setting_libreoffice_path.value + _('LibreOffice not installed or not found.') ) new_file_object, input_filepath = mkstemp() @@ -471,7 +474,3 @@ BaseTransformation.register(TransformationRotate180) BaseTransformation.register(TransformationRotate270) BaseTransformation.register(TransformationUnsharpMask) BaseTransformation.register(TransformationZoom) - - - - diff --git a/mayan/apps/converter/literals.py b/mayan/apps/converter/literals.py index e058f313bc..feb54cc369 100644 --- a/mayan/apps/converter/literals.py +++ b/mayan/apps/converter/literals.py @@ -4,5 +4,6 @@ DEFAULT_ZOOM_LEVEL = 100 DEFAULT_ROTATION = 0 DEFAULT_PAGE_NUMBER = 1 DEFAULT_FILE_FORMAT = 'JPEG' +DEFAULT_LIBREOFFICE_PATH = '/usr/bin/libreoffice' DIMENSION_SEPARATOR = 'x' diff --git a/mayan/apps/converter/settings.py b/mayan/apps/converter/settings.py index 15df41bfff..cc283b6069 100644 --- a/mayan/apps/converter/settings.py +++ b/mayan/apps/converter/settings.py @@ -10,12 +10,10 @@ setting_graphics_backend = namespace.add_setting( help_text=_('Graphics conversion backend to use.'), global_name='CONVERTER_GRAPHICS_BACKEND', ) -setting_libreoffice_path = namespace.add_setting( - default='/usr/bin/libreoffice', - global_name='CONVERTER_LIBREOFFICE_PATH', - help_text=_('Path to the libreoffice program.'), is_path=True -) -setting_pdftoppm_path = namespace.add_setting( - default='/usr/bin/pdftoppm', global_name='CONVERTER_PDFTOPPM_PATH', - help_text=_('Path to the Popple program pdftoppm.'), is_path=True +setting_graphics_backend_config = namespace.add_setting( + default='{libreoffice_path: /usr/bin/libreoffice, ' + 'pdftoppm_path: /usr/bin/pdftoppm, pdfinfo_path: /usr/bin/pdfinfo}', + help_text=_( + 'Configuration options for the graphics conversion backend.' + ), global_name='CONVERTER_GRAPHICS_BACKEND_CONFIG', )