Convert and cache office documents at the document version level for faster page image retrieval

2015-06-24 01:04:35 -04:00
parent 3d68e79654
commit b18888b3f7
7 changed files with 116 additions and 69 deletions
--- a/mayan/apps/converter/backends/python.py
+++ b/mayan/apps/converter/backends/python.py
@@ -39,12 +39,8 @@ class Python(ConverterBase):

            new_file_object, input_filepath = tempfile.mkstemp()

-            if self.soffice_file_object:
-                os.write(new_file_object, self.soffice_file_object.read())
-                self.soffice_file_object.close()
-            else:
-                os.write(new_file_object, self.file_object.read())
-                self.file_object.seek(0)
+            os.write(new_file_object, self.file_object.read())
+            self.file_object.seek(0)

            os.close(new_file_object)

@@ -57,6 +53,8 @@ class Python(ConverterBase):
                fs_cleanup(input_filepath)

    def get_page_count(self):
+        super(Python, self).get_page_count()
+
        page_count = 1

        if self.mime_type == 'application/pdf':
@@ -64,25 +62,24 @@ class Python(ConverterBase):
            try:
                pages = slate.PDF(self.file_object)
            except Exception as exception:
-                logger.error('slate exception; %s', exception)
-                return 1
-                # TODO: Maybe return UnknownFileFormat to display proper unknwon file format message in document description
+                logger.error('Slate exception; %s', exception)
+                raise
            else:
                return len(pages)
            finally:
                self.file_object.seek(0)
+        else:
+            try:
+                image = Image.open(self.file_object)
+            finally:
+                self.file_object.seek(0)

-        try:
-            image = Image.open(self.file_object)
-        finally:
-            self.file_object.seek(0)
+            try:
+                while True:
+                    image.seek(image.tell() + 1)
+                    page_count += 1
+            except EOFError:
+                # end of sequence
+                pass

-        try:
-            while True:
-                image.seek(image.tell() + 1)
-                page_count += 1
-        except EOFError:
-            # end of sequence
-            pass
-
-        return page_count
+            return page_count