diff --git a/mayan/apps/converter/classes.py b/mayan/apps/converter/classes.py index a0ff3dfba3..d62e9af77c 100644 --- a/mayan/apps/converter/classes.py +++ b/mayan/apps/converter/classes.py @@ -104,13 +104,49 @@ class ConverterBase(object): self.soffice_file = None Image.init() - def to_pdf(self): - if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES: - return self.soffice() - else: - raise InvalidOfficeFormat(_('Not an office file format.')) + def convert(self, page_number=DEFAULT_PAGE_NUMBER): + self.page_number = page_number - def seek(self, page_number): + def detect_orientation(self, page_number): + # Must be overrided by subclass + pass + + def get_page(self, output_format=None): + output_format = output_format or yaml.load( + stream=setting_graphics_backend_config.value, Loader=SafeLoader + ).get( + 'pillow_format', DEFAULT_PILLOW_FORMAT + ) + + if not self.image: + self.seek_page(page_number=0) + + image_buffer = BytesIO() + new_mode = self.image.mode + + if output_format.upper() == 'JPEG': + # JPEG doesn't support transparency channel, convert the image to + # RGB. Removes modes: P and RGBA + new_mode = 'RGB' + + self.image.convert(new_mode).save(image_buffer, format=output_format) + + image_buffer.seek(0) + + return image_buffer + + def get_page_count(self): + try: + self.soffice_file = self.to_pdf() + except InvalidOfficeFormat as exception: + logger.debug('Is not an office format document; %s', exception) + + def seek_page(self, page_number): + """ + Seek the specified page number from the source file object. + If the file is a paged image get the page if not convert it to a + paged image format and return the specified page as an image. + """ # Starting with #0 self.file_object.seek(0) @@ -193,52 +229,21 @@ class ConverterBase(object): new_file_object.close() fs_cleanup(converted_output) - def get_page(self, output_format=None): - output_format = output_format or yaml.load( - stream=setting_graphics_backend_config.value, Loader=SafeLoader - ).get( - 'pillow_format', DEFAULT_PILLOW_FORMAT - ) - - if not self.image: - self.seek(0) - - image_buffer = BytesIO() - new_mode = self.image.mode - - if output_format.upper() == 'JPEG': - # JPEG doesn't support transparency channel, convert the image to - # RGB. Removes modes: P and RGBA - new_mode = 'RGB' - - self.image.convert(new_mode).save(image_buffer, format=output_format) - - image_buffer.seek(0) - - return image_buffer - - def convert(self, page_number=DEFAULT_PAGE_NUMBER): - self.page_number = page_number + def to_pdf(self): + if self.mime_type in CONVERTER_OFFICE_FILE_MIMETYPES: + return self.soffice() + else: + raise InvalidOfficeFormat(_('Not an office file format.')) def transform(self, transformation): if not self.image: - self.seek(0) + self.seek_page(page_number=0) self.image = transformation.execute_on(image=self.image) def transform_many(self, transformations): if not self.image: - self.seek(0) + self.seek_page(page_number=0) for transformation in transformations: self.image = transformation.execute_on(image=self.image) - - def get_page_count(self): - try: - self.soffice_file = self.to_pdf() - except InvalidOfficeFormat as exception: - logger.debug('Is not an office format document; %s', exception) - - def detect_orientation(self, page_number): - # Must be overrided by subclass - pass diff --git a/mayan/apps/document_parsing/parsers.py b/mayan/apps/document_parsing/parsers.py index 2ffd900a44..0e4d0cff70 100644 --- a/mayan/apps/document_parsing/parsers.py +++ b/mayan/apps/document_parsing/parsers.py @@ -77,7 +77,7 @@ class Parser(object): document_page.page_number, document_page.document_version ) - file_object = document_page.document_version.get_intermidiate_file() + file_object = document_page.document_version.get_intermediate_file() try: document_page_content, created = DocumentPageContent.objects.get_or_create( diff --git a/mayan/apps/documents/models/document_page_models.py b/mayan/apps/documents/models/document_page_models.py index 0326f43061..16b0a305d0 100644 --- a/mayan/apps/documents/models/document_page_models.py +++ b/mayan/apps/documents/models/document_page_models.py @@ -195,15 +195,15 @@ class DocumentPage(models.Model): file_object=storage_documentimagecache.open(cache_filename) ) - converter.seek(0) + converter.seek_page(page_number=0) else: logger.debug('Page cache file "%s" not found', cache_filename) try: converter = get_converter_class()( - file_object=self.document_version.get_intermidiate_file() + file_object=self.document_version.get_intermediate_file() ) - converter.seek(page_number=self.page_number - 1) + converter.seek_page(page_number=self.page_number - 1) page_image = converter.get_page() diff --git a/mayan/apps/documents/models/document_version_models.py b/mayan/apps/documents/models/document_version_models.py index 3b1ec1a921..5afda517f2 100644 --- a/mayan/apps/documents/models/document_version_models.py +++ b/mayan/apps/documents/models/document_version_models.py @@ -163,7 +163,7 @@ class DocumentVersion(models.Model): if first_page: return first_page.get_api_image_url(*args, **kwargs) - def get_intermidiate_file(self): + def get_intermediate_file(self): cache_filename = self.cache_filename logger.debug('Intermidiate filename: %s', cache_filename)