Compare commits

...

2 Commits

Author SHA1 Message Date
Roberto Rosario
bbcf7f53fb Change PCL conversion output format to PNG. Add special case for empty pages at the end of PCL documents. 2016-02-25 02:04:47 -04:00
Roberto Rosario
7e67a2384f Add PCL detection and rendering support. 2016-02-25 01:27:43 -04:00
3 changed files with 66 additions and 2 deletions

View File

@@ -20,7 +20,7 @@ from common.utils import fs_cleanup
from ..classes import ConverterBase from ..classes import ConverterBase
from ..exceptions import PageCountError from ..exceptions import PageCountError
from ..settings import setting_pdftoppm_path from ..settings import setting_gpcl_path, setting_pdftoppm_path
try: try:
pdftoppm = sh.Command(setting_pdftoppm_path.value) pdftoppm = sh.Command(setting_pdftoppm_path.value)
@@ -29,6 +29,13 @@ except sh.CommandNotFound:
else: else:
pdftoppm = pdftoppm.bake('-png') pdftoppm = pdftoppm.bake('-png')
try:
gpcl = sh.Command(setting_gpcl_path.value)
except sh.CommandNotFound:
gpcl = None
else:
gpcl = gpcl.bake('-dNOPAUSE', '-dSAFER', '-dBATCH', '-sOutputFile=-')
Image.init() Image.init()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -48,6 +55,31 @@ class Python(ConverterBase):
def convert(self, *args, **kwargs): def convert(self, *args, **kwargs):
super(Python, self).convert(*args, **kwargs) super(Python, self).convert(*args, **kwargs)
if self.mime_type == 'application/x-pcl' and gpcl:
new_file_object, input_filepath = tempfile.mkstemp()
self.file_object.seek(0)
os.write(new_file_object, self.file_object.read())
self.file_object.seek(0)
os.close(new_file_object)
image_buffer = io.BytesIO()
try:
gpcl(
'-r300', '-sDEVICE=png16m',
'-dFirstPage={}'.format(self.page_number + 1),
'-dLastPage={}'.format(self.page_number + 1),
input_filepath, _out=image_buffer
)
image_buffer.seek(0)
try:
return Image.open(image_buffer)
except IOError:
# Special case for empty pages at the end of PCL
# documents
return Image.new(mode='1', size=(100,100), color=1)
finally:
fs_cleanup(input_filepath)
if self.mime_type == 'application/pdf' and pdftoppm: if self.mime_type == 'application/pdf' and pdftoppm:
new_file_object, input_filepath = tempfile.mkstemp() new_file_object, input_filepath = tempfile.mkstemp()
@@ -61,7 +93,7 @@ class Python(ConverterBase):
try: try:
pdftoppm( pdftoppm(
input_filepath, f=self.page_number + 1, input_filepath, f=self.page_number + 1,
l=self.page_number + 1, _out=image_buffer l=self.page_number + 1, _out=image_buffer,
) )
image_buffer.seek(0) image_buffer.seek(0)
return Image.open(image_buffer) return Image.open(image_buffer)
@@ -73,6 +105,25 @@ class Python(ConverterBase):
page_count = 1 page_count = 1
if self.mime_type == 'application/x-pcl' and gpcl:
new_file_object, input_filepath = tempfile.mkstemp()
self.file_object.seek(0)
os.write(new_file_object, self.file_object.read())
self.file_object.seek(0)
os.close(new_file_object)
file_buffer = io.BytesIO()
try:
gpcl(
'-sDEVICE=pdfwrite', input_filepath, _out=file_buffer
)
file_buffer.seek(0)
self.file_object = file_buffer
self.mime_type = 'application/pdf'
finally:
fs_cleanup(input_filepath)
if self.mime_type == 'application/pdf' or self.soffice_file: if self.mime_type == 'application/pdf' or self.soffice_file:
# If file is a PDF open it with slate to determine the page count # If file is a PDF open it with slate to determine the page count
if self.soffice_file: if self.soffice_file:

View File

@@ -19,3 +19,7 @@ setting_pdftoppm_path = namespace.add_setting(
default='/usr/bin/pdftoppm', global_name='CONVERTER_PDFTOPPM_PATH', default='/usr/bin/pdftoppm', global_name='CONVERTER_PDFTOPPM_PATH',
help_text=_('Path to the Popple program pdftoppm.'), is_path=True help_text=_('Path to the Popple program pdftoppm.'), is_path=True
) )
setting_gpcl_path = namespace.add_setting(
default='/usr/bin/gpcl', global_name='CONVERTER_GPCL_PATH',
help_text=_('Path to the Ghostscript program gpcl.'), is_path=True
)

View File

@@ -22,4 +22,13 @@ def get_mimetype(file_object, mimetype_only=False):
file_mime_encoding = mime_encoding.from_buffer(file_object.read()) file_mime_encoding = mime_encoding.from_buffer(file_object.read())
file_object.seek(0) file_object.seek(0)
# Special case for PCL files
if file_mimetype in ('application/octet-stream', 'text/plain'):
signature = file_object.read(2)
file_object.seek(0)
# Two-Character Escape Sequences ASCII 48-126
# Parameterized Escape Sequences ASCII 33-47
if signature[0] == b'\x1b' and ord(signature[1]) >= 33 and ord(signature[1]) <= 126:
file_mimetype = 'application/x-pcl'
return file_mimetype, file_mime_encoding return file_mimetype, file_mime_encoding