diff --git a/HISTORY.rst b/HISTORY.rst index 1b91c2a195..bff0656eb0 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -95,6 +95,7 @@ * Move the purge permission logic to the StorePermission manager. * Remove the MIMETYPE_FILE_READ_SIZE setting. +* Use copyfileobj in the document parsers. 3.1.11 (2019-04-XX) =================== diff --git a/docs/releases/3.2.rst b/docs/releases/3.2.rst index 2262aaaeec..1d68d38f3e 100644 --- a/docs/releases/3.2.rst +++ b/docs/releases/3.2.rst @@ -127,6 +127,7 @@ Other changes * Move the purge permission logic to the StorePermission manager. * Remove the MIMETYPE_FILE_READ_SIZE setting. +* Use copyfileobj in the document parsers. Removals -------- diff --git a/mayan/apps/document_parsing/parsers.py b/mayan/apps/document_parsing/parsers.py index ea6ed8dc60..7b64f886d5 100644 --- a/mayan/apps/document_parsing/parsers.py +++ b/mayan/apps/document_parsing/parsers.py @@ -2,12 +2,13 @@ from __future__ import unicode_literals import logging import os +from shutil import copyfileobj import subprocess from django.apps import apps from django.utils.translation import ugettext_lazy as _ -from mayan.apps.storage.utils import copyfile, fs_cleanup, mkstemp +from mayan.apps.storage.utils import NamedTemporaryFile from .exceptions import ParserError from .settings import setting_pdftotext_path @@ -123,8 +124,9 @@ class PopplerParser(Parser): def execute(self, file_object, page_number): logger.debug('Parsing PDF page: %d', page_number) - destination_descriptor, temp_filepath = mkstemp() - copyfile(file_object, temp_filepath) + temporary_file_object = NamedTemporaryFile() + copyfileobj(fsrc=file_object, fdst=temporary_file_object) + temporary_file_object.seek(0) command = [] command.append(self.pdftotext_path) @@ -132,7 +134,7 @@ class PopplerParser(Parser): command.append(str(page_number)) command.append('-l') command.append(str(page_number)) - command.append(temp_filepath) + command.append(temporary_file_object.name) command.append('-') proc = subprocess.Popen( @@ -142,12 +144,12 @@ class PopplerParser(Parser): return_code = proc.wait() if return_code != 0: logger.error(proc.stderr.readline()) - fs_cleanup(temp_filepath, file_descriptor=destination_descriptor) + temporary_file_object.close() raise ParserError output = proc.stdout.read() - fs_cleanup(temp_filepath, file_descriptor=destination_descriptor) + temporary_file_object.close() if output == b'\x0c': logger.debug('Parser didn\'t return any output')