diff --git a/HISTORY.rst b/HISTORY.rst index 67702d16fe..dd4f551b69 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,7 @@ XX (2017-XX-XX) - Add Django-mathfilters. - Improve render of documents with no pages. - Add SANE scanner document source. +- Added PDF orientation detection. 2.3 (2017-06-08) ================ diff --git a/mayan/apps/converter/backends/python.py b/mayan/apps/converter/backends/python.py index 5ef3b216f9..381e2e81e0 100644 --- a/mayan/apps/converter/backends/python.py +++ b/mayan/apps/converter/backends/python.py @@ -10,6 +10,7 @@ except ImportError: from StringIO import StringIO from PIL import Image +import PyPDF2 from pdfminer.pdfpage import PDFPage import sh @@ -67,6 +68,19 @@ class Python(ConverterBase): finally: fs_cleanup(input_filepath) + def detect_orientation(self, page_number): + # Use different ways depending on the file type + if self.mime_type == 'application/pdf': + pdf = PyPDF2.PdfFileReader(self.file_object) + result = pdf.getPage(page_number - 1).get('/Rotate') + + self.file_object.seek(0) + + return result + + # Default rotation: 0 degrees + return 0 + def get_page_count(self): super(Python, self).get_page_count() diff --git a/mayan/apps/converter/classes.py b/mayan/apps/converter/classes.py index 9ab180e6fe..1a5329638d 100644 --- a/mayan/apps/converter/classes.py +++ b/mayan/apps/converter/classes.py @@ -214,6 +214,10 @@ class ConverterBase(object): except InvalidOfficeFormat as exception: logger.debug('Is not an office format document; %s', exception) + def detect_orientation(self, page_number): + # Must be overrided by subclass + pass + class BaseTransformation(object): """ diff --git a/mayan/apps/converter/managers.py b/mayan/apps/converter/managers.py index 3350774c59..2792127486 100644 --- a/mayan/apps/converter/managers.py +++ b/mayan/apps/converter/managers.py @@ -102,3 +102,11 @@ class TransformationManager(models.Manager): return result else: return transformations + + def add_for_model(self, obj, transformation, arguments=None): + content_type = ContentType.objects.get_for_model(obj) + + self.create( + content_type=content_type, object_id=obj.pk, + name=transformation.name, arguments=arguments + ) diff --git a/mayan/apps/documents/models.py b/mayan/apps/documents/models.py index c0c02a3136..5d7228ae7d 100644 --- a/mayan/apps/documents/models.py +++ b/mayan/apps/documents/models.py @@ -420,6 +420,7 @@ class DocumentVersion(models.Model): self.update_mimetype(save=False) self.save() self.update_page_count(save=False) + self.fix_orientation() logger.info( 'New document version "%s" created for document: %s', @@ -466,6 +467,15 @@ class DocumentVersion(models.Model): """ return self.file.storage.exists(self.file.name) + def fix_orientation(self): + for page in self.pages.all(): + degrees = page.detect_orientation() + if degrees: + Transformation.objects.add_for_model( + obj=page, transformation=TransformationRotate, + arguments='{{"degrees": {}}}'.format(360-degrees) + ) + def get_intermidiate_file(self): cache_filename = self.cache_filename logger.debug('Intermidiate filename: %s', cache_filename) @@ -693,6 +703,16 @@ class DocumentPage(models.Model): def document(self): return self.document_version.document + def detect_orientation(self): + with self.document_version.open() as file_object: + converter = converter_class( + file_object=file_object, + mime_type=self.document_version.mimetype + ) + return converter.detect_orientation( + page_number=self.page_number + ) + def generate_image(self, *args, **kwargs): # Convert arguments into transformations transformations = kwargs.get('transformations', []) diff --git a/requirements/base.txt b/requirements/base.txt index 5b7ff5beee..88b755e5ef 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -28,6 +28,7 @@ fusepy==2.0.4 pdfminer==20140328 pycountry==1.20 +PyPDF2==1.26.0 pyocr==0.4.5 python-dateutil==2.5.3 python-gnupg==0.3.9 diff --git a/setup.py b/setup.py index d16d380c8c..0c4722fb2b 100644 --- a/setup.py +++ b/setup.py @@ -82,6 +82,7 @@ djangorestframework==3.3.2 djangorestframework-recursive==0.1.1 fusepy==2.0.4 pdfminer==20140328 +PyPDF2==1.26.0 pycountry==1.20 pyocr==0.4.5 python-dateutil==2.5.3