Files
mayan-edms/mayan/apps/converter/backends/python.py

109 lines
2.9 KiB
Python

from __future__ import unicode_literals
import io
import logging
import os
import tempfile
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from PIL import Image
from pdfminer.pdfpage import PDFPage
import sh
from django.utils.translation import ugettext_lazy as _
from common.utils import fs_cleanup
from ..classes import ConverterBase
from ..exceptions import PageCountError
from ..settings import setting_pdftoppm_path
try:
pdftoppm = sh.Command(setting_pdftoppm_path.value)
except sh.CommandNotFound:
pdftoppm = None
else:
pdftoppm = pdftoppm.bake('-png')
Image.init()
logger = logging.getLogger(__name__)
class IteratorIO(object):
def __init__(self, iterator):
self.file_buffer = StringIO()
for chunk in iterator:
self.file_buffer.write(chunk)
self.file_buffer.seek(0)
class Python(ConverterBase):
def convert(self, *args, **kwargs):
super(Python, self).convert(*args, **kwargs)
if self.mime_type == 'application/pdf' and pdftoppm:
new_file_object, input_filepath = tempfile.mkstemp()
os.write(new_file_object, self.file_object.read())
self.file_object.seek(0)
os.close(new_file_object)
image_buffer = io.BytesIO()
try:
pdftoppm(
input_filepath, f=self.page_number + 1,
l=self.page_number + 1, _out=image_buffer
)
image_buffer.seek(0)
return Image.open(image_buffer)
finally:
fs_cleanup(input_filepath)
def get_page_count(self):
super(Python, self).get_page_count()
page_count = 1
if self.mime_type == 'application/pdf' or self.soffice_file:
# If file is a PDF open it with slate to determine the page count
if self.soffice_file:
file_object = IteratorIO(self.soffice_file).file_buffer
else:
file_object = self.file_object
try:
page_count = len(list(PDFPage.get_pages(file_object)))
except Exception as exception:
error_message = _('Exception determining PDF page count; %s') % exception
logger.error(error_message)
raise PageCountError(error_message)
else:
logger.debug('Document contains %d pages', page_count)
return page_count
finally:
file_object.seek(0)
else:
try:
image = Image.open(self.file_object)
finally:
self.file_object.seek(0)
try:
while True:
image.seek(image.tell() + 1)
page_count += 1
except EOFError:
# end of sequence
pass
return page_count