Add PDF orientation detection. Closes GitLab issue #387.
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -3,6 +3,7 @@ XX (2017-XX-XX)
|
||||
- Add Django-mathfilters.
|
||||
- Improve render of documents with no pages.
|
||||
- Add SANE scanner document source.
|
||||
- Added PDF orientation detection.
|
||||
|
||||
2.3 (2017-06-08)
|
||||
================
|
||||
|
||||
@@ -10,6 +10,7 @@ except ImportError:
|
||||
from StringIO import StringIO
|
||||
|
||||
from PIL import Image
|
||||
import PyPDF2
|
||||
from pdfminer.pdfpage import PDFPage
|
||||
import sh
|
||||
|
||||
@@ -67,6 +68,19 @@ class Python(ConverterBase):
|
||||
finally:
|
||||
fs_cleanup(input_filepath)
|
||||
|
||||
def detect_orientation(self, page_number):
|
||||
# Use different ways depending on the file type
|
||||
if self.mime_type == 'application/pdf':
|
||||
pdf = PyPDF2.PdfFileReader(self.file_object)
|
||||
result = pdf.getPage(page_number - 1).get('/Rotate')
|
||||
|
||||
self.file_object.seek(0)
|
||||
|
||||
return result
|
||||
|
||||
# Default rotation: 0 degrees
|
||||
return 0
|
||||
|
||||
def get_page_count(self):
|
||||
super(Python, self).get_page_count()
|
||||
|
||||
|
||||
@@ -214,6 +214,10 @@ class ConverterBase(object):
|
||||
except InvalidOfficeFormat as exception:
|
||||
logger.debug('Is not an office format document; %s', exception)
|
||||
|
||||
def detect_orientation(self, page_number):
|
||||
# Must be overrided by subclass
|
||||
pass
|
||||
|
||||
|
||||
class BaseTransformation(object):
|
||||
"""
|
||||
|
||||
@@ -102,3 +102,11 @@ class TransformationManager(models.Manager):
|
||||
return result
|
||||
else:
|
||||
return transformations
|
||||
|
||||
def add_for_model(self, obj, transformation, arguments=None):
|
||||
content_type = ContentType.objects.get_for_model(obj)
|
||||
|
||||
self.create(
|
||||
content_type=content_type, object_id=obj.pk,
|
||||
name=transformation.name, arguments=arguments
|
||||
)
|
||||
|
||||
@@ -420,6 +420,7 @@ class DocumentVersion(models.Model):
|
||||
self.update_mimetype(save=False)
|
||||
self.save()
|
||||
self.update_page_count(save=False)
|
||||
self.fix_orientation()
|
||||
|
||||
logger.info(
|
||||
'New document version "%s" created for document: %s',
|
||||
@@ -466,6 +467,15 @@ class DocumentVersion(models.Model):
|
||||
"""
|
||||
return self.file.storage.exists(self.file.name)
|
||||
|
||||
def fix_orientation(self):
|
||||
for page in self.pages.all():
|
||||
degrees = page.detect_orientation()
|
||||
if degrees:
|
||||
Transformation.objects.add_for_model(
|
||||
obj=page, transformation=TransformationRotate,
|
||||
arguments='{{"degrees": {}}}'.format(360-degrees)
|
||||
)
|
||||
|
||||
def get_intermidiate_file(self):
|
||||
cache_filename = self.cache_filename
|
||||
logger.debug('Intermidiate filename: %s', cache_filename)
|
||||
@@ -693,6 +703,16 @@ class DocumentPage(models.Model):
|
||||
def document(self):
|
||||
return self.document_version.document
|
||||
|
||||
def detect_orientation(self):
|
||||
with self.document_version.open() as file_object:
|
||||
converter = converter_class(
|
||||
file_object=file_object,
|
||||
mime_type=self.document_version.mimetype
|
||||
)
|
||||
return converter.detect_orientation(
|
||||
page_number=self.page_number
|
||||
)
|
||||
|
||||
def generate_image(self, *args, **kwargs):
|
||||
# Convert arguments into transformations
|
||||
transformations = kwargs.get('transformations', [])
|
||||
|
||||
@@ -28,6 +28,7 @@ fusepy==2.0.4
|
||||
|
||||
pdfminer==20140328
|
||||
pycountry==1.20
|
||||
PyPDF2==1.26.0
|
||||
pyocr==0.4.5
|
||||
python-dateutil==2.5.3
|
||||
python-gnupg==0.3.9
|
||||
|
||||
Reference in New Issue
Block a user