Add PDF orientation detection. Closes GitLab issue #387.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2017-06-16 22:29:36 -04:00
parent 86a351e826
commit 4150fea2ad
7 changed files with 49 additions and 0 deletions

View File

@@ -3,6 +3,7 @@ XX (2017-XX-XX)
- Add Django-mathfilters. - Add Django-mathfilters.
- Improve render of documents with no pages. - Improve render of documents with no pages.
- Add SANE scanner document source. - Add SANE scanner document source.
- Added PDF orientation detection.
2.3 (2017-06-08) 2.3 (2017-06-08)
================ ================

View File

@@ -10,6 +10,7 @@ except ImportError:
from StringIO import StringIO from StringIO import StringIO
from PIL import Image from PIL import Image
import PyPDF2
from pdfminer.pdfpage import PDFPage from pdfminer.pdfpage import PDFPage
import sh import sh
@@ -67,6 +68,19 @@ class Python(ConverterBase):
finally: finally:
fs_cleanup(input_filepath) fs_cleanup(input_filepath)
def detect_orientation(self, page_number):
# Use different ways depending on the file type
if self.mime_type == 'application/pdf':
pdf = PyPDF2.PdfFileReader(self.file_object)
result = pdf.getPage(page_number - 1).get('/Rotate')
self.file_object.seek(0)
return result
# Default rotation: 0 degrees
return 0
def get_page_count(self): def get_page_count(self):
super(Python, self).get_page_count() super(Python, self).get_page_count()

View File

@@ -214,6 +214,10 @@ class ConverterBase(object):
except InvalidOfficeFormat as exception: except InvalidOfficeFormat as exception:
logger.debug('Is not an office format document; %s', exception) logger.debug('Is not an office format document; %s', exception)
def detect_orientation(self, page_number):
# Must be overrided by subclass
pass
class BaseTransformation(object): class BaseTransformation(object):
""" """

View File

@@ -102,3 +102,11 @@ class TransformationManager(models.Manager):
return result return result
else: else:
return transformations return transformations
def add_for_model(self, obj, transformation, arguments=None):
content_type = ContentType.objects.get_for_model(obj)
self.create(
content_type=content_type, object_id=obj.pk,
name=transformation.name, arguments=arguments
)

View File

@@ -420,6 +420,7 @@ class DocumentVersion(models.Model):
self.update_mimetype(save=False) self.update_mimetype(save=False)
self.save() self.save()
self.update_page_count(save=False) self.update_page_count(save=False)
self.fix_orientation()
logger.info( logger.info(
'New document version "%s" created for document: %s', 'New document version "%s" created for document: %s',
@@ -466,6 +467,15 @@ class DocumentVersion(models.Model):
""" """
return self.file.storage.exists(self.file.name) return self.file.storage.exists(self.file.name)
def fix_orientation(self):
for page in self.pages.all():
degrees = page.detect_orientation()
if degrees:
Transformation.objects.add_for_model(
obj=page, transformation=TransformationRotate,
arguments='{{"degrees": {}}}'.format(360-degrees)
)
def get_intermidiate_file(self): def get_intermidiate_file(self):
cache_filename = self.cache_filename cache_filename = self.cache_filename
logger.debug('Intermidiate filename: %s', cache_filename) logger.debug('Intermidiate filename: %s', cache_filename)
@@ -693,6 +703,16 @@ class DocumentPage(models.Model):
def document(self): def document(self):
return self.document_version.document return self.document_version.document
def detect_orientation(self):
with self.document_version.open() as file_object:
converter = converter_class(
file_object=file_object,
mime_type=self.document_version.mimetype
)
return converter.detect_orientation(
page_number=self.page_number
)
def generate_image(self, *args, **kwargs): def generate_image(self, *args, **kwargs):
# Convert arguments into transformations # Convert arguments into transformations
transformations = kwargs.get('transformations', []) transformations = kwargs.get('transformations', [])

View File

@@ -28,6 +28,7 @@ fusepy==2.0.4
pdfminer==20140328 pdfminer==20140328
pycountry==1.20 pycountry==1.20
PyPDF2==1.26.0
pyocr==0.4.5 pyocr==0.4.5
python-dateutil==2.5.3 python-dateutil==2.5.3
python-gnupg==0.3.9 python-gnupg==0.3.9

View File

@@ -82,6 +82,7 @@ djangorestframework==3.3.2
djangorestframework-recursive==0.1.1 djangorestframework-recursive==0.1.1
fusepy==2.0.4 fusepy==2.0.4
pdfminer==20140328 pdfminer==20140328
PyPDF2==1.26.0
pycountry==1.20 pycountry==1.20
pyocr==0.4.5 pyocr==0.4.5
python-dateutil==2.5.3 python-dateutil==2.5.3