Add PDF orientation detection. Closes GitLab issue #387.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2017-06-16 22:29:36 -04:00
parent 86a351e826
commit 4150fea2ad
7 changed files with 49 additions and 0 deletions

View File

@@ -3,6 +3,7 @@ XX (2017-XX-XX)
- Add Django-mathfilters.
- Improve render of documents with no pages.
- Add SANE scanner document source.
- Added PDF orientation detection.
2.3 (2017-06-08)
================

View File

@@ -10,6 +10,7 @@ except ImportError:
from StringIO import StringIO
from PIL import Image
import PyPDF2
from pdfminer.pdfpage import PDFPage
import sh
@@ -67,6 +68,19 @@ class Python(ConverterBase):
finally:
fs_cleanup(input_filepath)
def detect_orientation(self, page_number):
# Use different ways depending on the file type
if self.mime_type == 'application/pdf':
pdf = PyPDF2.PdfFileReader(self.file_object)
result = pdf.getPage(page_number - 1).get('/Rotate')
self.file_object.seek(0)
return result
# Default rotation: 0 degrees
return 0
def get_page_count(self):
super(Python, self).get_page_count()

View File

@@ -214,6 +214,10 @@ class ConverterBase(object):
except InvalidOfficeFormat as exception:
logger.debug('Is not an office format document; %s', exception)
def detect_orientation(self, page_number):
# Must be overrided by subclass
pass
class BaseTransformation(object):
"""

View File

@@ -102,3 +102,11 @@ class TransformationManager(models.Manager):
return result
else:
return transformations
def add_for_model(self, obj, transformation, arguments=None):
content_type = ContentType.objects.get_for_model(obj)
self.create(
content_type=content_type, object_id=obj.pk,
name=transformation.name, arguments=arguments
)

View File

@@ -420,6 +420,7 @@ class DocumentVersion(models.Model):
self.update_mimetype(save=False)
self.save()
self.update_page_count(save=False)
self.fix_orientation()
logger.info(
'New document version "%s" created for document: %s',
@@ -466,6 +467,15 @@ class DocumentVersion(models.Model):
"""
return self.file.storage.exists(self.file.name)
def fix_orientation(self):
for page in self.pages.all():
degrees = page.detect_orientation()
if degrees:
Transformation.objects.add_for_model(
obj=page, transformation=TransformationRotate,
arguments='{{"degrees": {}}}'.format(360-degrees)
)
def get_intermidiate_file(self):
cache_filename = self.cache_filename
logger.debug('Intermidiate filename: %s', cache_filename)
@@ -693,6 +703,16 @@ class DocumentPage(models.Model):
def document(self):
return self.document_version.document
def detect_orientation(self):
with self.document_version.open() as file_object:
converter = converter_class(
file_object=file_object,
mime_type=self.document_version.mimetype
)
return converter.detect_orientation(
page_number=self.page_number
)
def generate_image(self, *args, **kwargs):
# Convert arguments into transformations
transformations = kwargs.get('transformations', [])

View File

@@ -28,6 +28,7 @@ fusepy==2.0.4
pdfminer==20140328
pycountry==1.20
PyPDF2==1.26.0
pyocr==0.4.5
python-dateutil==2.5.3
python-gnupg==0.3.9

View File

@@ -82,6 +82,7 @@ djangorestframework==3.3.2
djangorestframework-recursive==0.1.1
fusepy==2.0.4
pdfminer==20140328
PyPDF2==1.26.0
pycountry==1.20
pyocr==0.4.5
python-dateutil==2.5.3