Add support for getting an office document page count.

This commit is contained in:
Roberto Rosario
2015-07-03 03:19:42 -04:00
parent bee9ae32c3
commit 3d7e7ae4a2
2 changed files with 25 additions and 5 deletions

View File

@@ -30,6 +30,16 @@ Image.init()
logger = logging.getLogger(__name__)
class IteratorIO(object):
def __init__(self, iterator):
self.file_buffer = StringIO()
for chunk in iterator:
self.file_buffer.write(chunk)
self.file_buffer.seek(0)
class Python(ConverterBase):
def convert(self, *args, **kwargs):
@@ -53,19 +63,26 @@ class Python(ConverterBase):
fs_cleanup(input_filepath)
def get_page_count(self):
super(Python, self).get_page_count()
page_count = 1
if self.mime_type == 'application/pdf':
if self.mime_type == 'application/pdf' or self.soffice_file:
# If file is a PDF open it with slate to determine the page count
if self.soffice_file:
file_object = IteratorIO(self.soffice_file).file_buffer
else:
file_object = self.file_object
try:
pages = slate.PDF(self.file_object)
pages = slate.PDF(file_object)
except Exception as exception:
logger.error('Slate exception; %s', exception)
raise
else:
return len(pages)
finally:
self.file_object.seek(0)
file_object.seek(0)
else:
try:
image = Image.open(self.file_object)

View File

@@ -135,7 +135,7 @@ class ConverterBase(object):
self.file_object = file_object
self.image = None
self.mime_type = mime_type or get_mimetype(file_object=file_object, mimetype_only=False)[0]
self.soffice_file_object = None
self.soffice_file = None
def to_pdf(self):
if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES:
@@ -183,7 +183,10 @@ class ConverterBase(object):
self.image = transformation.execute_on(self.image)
def get_page_count(self):
raise NotImplementedError
try:
self.soffice_file = self.to_pdf()
except InvalidOfficeFormat:
pass
class BaseTransformation(object):