Add PDF orientation detection. Closes GitLab issue #387.
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -3,6 +3,7 @@ XX (2017-XX-XX)
|
|||||||
- Add Django-mathfilters.
|
- Add Django-mathfilters.
|
||||||
- Improve render of documents with no pages.
|
- Improve render of documents with no pages.
|
||||||
- Add SANE scanner document source.
|
- Add SANE scanner document source.
|
||||||
|
- Added PDF orientation detection.
|
||||||
|
|
||||||
2.3 (2017-06-08)
|
2.3 (2017-06-08)
|
||||||
================
|
================
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ except ImportError:
|
|||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
import PyPDF2
|
||||||
from pdfminer.pdfpage import PDFPage
|
from pdfminer.pdfpage import PDFPage
|
||||||
import sh
|
import sh
|
||||||
|
|
||||||
@@ -67,6 +68,19 @@ class Python(ConverterBase):
|
|||||||
finally:
|
finally:
|
||||||
fs_cleanup(input_filepath)
|
fs_cleanup(input_filepath)
|
||||||
|
|
||||||
|
def detect_orientation(self, page_number):
|
||||||
|
# Use different ways depending on the file type
|
||||||
|
if self.mime_type == 'application/pdf':
|
||||||
|
pdf = PyPDF2.PdfFileReader(self.file_object)
|
||||||
|
result = pdf.getPage(page_number - 1).get('/Rotate')
|
||||||
|
|
||||||
|
self.file_object.seek(0)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Default rotation: 0 degrees
|
||||||
|
return 0
|
||||||
|
|
||||||
def get_page_count(self):
|
def get_page_count(self):
|
||||||
super(Python, self).get_page_count()
|
super(Python, self).get_page_count()
|
||||||
|
|
||||||
|
|||||||
@@ -214,6 +214,10 @@ class ConverterBase(object):
|
|||||||
except InvalidOfficeFormat as exception:
|
except InvalidOfficeFormat as exception:
|
||||||
logger.debug('Is not an office format document; %s', exception)
|
logger.debug('Is not an office format document; %s', exception)
|
||||||
|
|
||||||
|
def detect_orientation(self, page_number):
|
||||||
|
# Must be overrided by subclass
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class BaseTransformation(object):
|
class BaseTransformation(object):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -102,3 +102,11 @@ class TransformationManager(models.Manager):
|
|||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
return transformations
|
return transformations
|
||||||
|
|
||||||
|
def add_for_model(self, obj, transformation, arguments=None):
|
||||||
|
content_type = ContentType.objects.get_for_model(obj)
|
||||||
|
|
||||||
|
self.create(
|
||||||
|
content_type=content_type, object_id=obj.pk,
|
||||||
|
name=transformation.name, arguments=arguments
|
||||||
|
)
|
||||||
|
|||||||
@@ -420,6 +420,7 @@ class DocumentVersion(models.Model):
|
|||||||
self.update_mimetype(save=False)
|
self.update_mimetype(save=False)
|
||||||
self.save()
|
self.save()
|
||||||
self.update_page_count(save=False)
|
self.update_page_count(save=False)
|
||||||
|
self.fix_orientation()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
'New document version "%s" created for document: %s',
|
'New document version "%s" created for document: %s',
|
||||||
@@ -466,6 +467,15 @@ class DocumentVersion(models.Model):
|
|||||||
"""
|
"""
|
||||||
return self.file.storage.exists(self.file.name)
|
return self.file.storage.exists(self.file.name)
|
||||||
|
|
||||||
|
def fix_orientation(self):
|
||||||
|
for page in self.pages.all():
|
||||||
|
degrees = page.detect_orientation()
|
||||||
|
if degrees:
|
||||||
|
Transformation.objects.add_for_model(
|
||||||
|
obj=page, transformation=TransformationRotate,
|
||||||
|
arguments='{{"degrees": {}}}'.format(360-degrees)
|
||||||
|
)
|
||||||
|
|
||||||
def get_intermidiate_file(self):
|
def get_intermidiate_file(self):
|
||||||
cache_filename = self.cache_filename
|
cache_filename = self.cache_filename
|
||||||
logger.debug('Intermidiate filename: %s', cache_filename)
|
logger.debug('Intermidiate filename: %s', cache_filename)
|
||||||
@@ -693,6 +703,16 @@ class DocumentPage(models.Model):
|
|||||||
def document(self):
|
def document(self):
|
||||||
return self.document_version.document
|
return self.document_version.document
|
||||||
|
|
||||||
|
def detect_orientation(self):
|
||||||
|
with self.document_version.open() as file_object:
|
||||||
|
converter = converter_class(
|
||||||
|
file_object=file_object,
|
||||||
|
mime_type=self.document_version.mimetype
|
||||||
|
)
|
||||||
|
return converter.detect_orientation(
|
||||||
|
page_number=self.page_number
|
||||||
|
)
|
||||||
|
|
||||||
def generate_image(self, *args, **kwargs):
|
def generate_image(self, *args, **kwargs):
|
||||||
# Convert arguments into transformations
|
# Convert arguments into transformations
|
||||||
transformations = kwargs.get('transformations', [])
|
transformations = kwargs.get('transformations', [])
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ fusepy==2.0.4
|
|||||||
|
|
||||||
pdfminer==20140328
|
pdfminer==20140328
|
||||||
pycountry==1.20
|
pycountry==1.20
|
||||||
|
PyPDF2==1.26.0
|
||||||
pyocr==0.4.5
|
pyocr==0.4.5
|
||||||
python-dateutil==2.5.3
|
python-dateutil==2.5.3
|
||||||
python-gnupg==0.3.9
|
python-gnupg==0.3.9
|
||||||
|
|||||||
1
setup.py
1
setup.py
@@ -82,6 +82,7 @@ djangorestframework==3.3.2
|
|||||||
djangorestframework-recursive==0.1.1
|
djangorestframework-recursive==0.1.1
|
||||||
fusepy==2.0.4
|
fusepy==2.0.4
|
||||||
pdfminer==20140328
|
pdfminer==20140328
|
||||||
|
PyPDF2==1.26.0
|
||||||
pycountry==1.20
|
pycountry==1.20
|
||||||
pyocr==0.4.5
|
pyocr==0.4.5
|
||||||
python-dateutil==2.5.3
|
python-dateutil==2.5.3
|
||||||
|
|||||||
Reference in New Issue
Block a user