Use copyfileobj in the document parsers
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -95,6 +95,7 @@
|
|||||||
* Move the purge permission logic to the StorePermission
|
* Move the purge permission logic to the StorePermission
|
||||||
manager.
|
manager.
|
||||||
* Remove the MIMETYPE_FILE_READ_SIZE setting.
|
* Remove the MIMETYPE_FILE_READ_SIZE setting.
|
||||||
|
* Use copyfileobj in the document parsers.
|
||||||
|
|
||||||
3.1.11 (2019-04-XX)
|
3.1.11 (2019-04-XX)
|
||||||
===================
|
===================
|
||||||
|
|||||||
@@ -127,6 +127,7 @@ Other changes
|
|||||||
* Move the purge permission logic to the StorePermission
|
* Move the purge permission logic to the StorePermission
|
||||||
manager.
|
manager.
|
||||||
* Remove the MIMETYPE_FILE_READ_SIZE setting.
|
* Remove the MIMETYPE_FILE_READ_SIZE setting.
|
||||||
|
* Use copyfileobj in the document parsers.
|
||||||
|
|
||||||
Removals
|
Removals
|
||||||
--------
|
--------
|
||||||
|
|||||||
@@ -2,12 +2,13 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from shutil import copyfileobj
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from django.apps import apps
|
from django.apps import apps
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
from mayan.apps.storage.utils import copyfile, fs_cleanup, mkstemp
|
from mayan.apps.storage.utils import NamedTemporaryFile
|
||||||
|
|
||||||
from .exceptions import ParserError
|
from .exceptions import ParserError
|
||||||
from .settings import setting_pdftotext_path
|
from .settings import setting_pdftotext_path
|
||||||
@@ -123,8 +124,9 @@ class PopplerParser(Parser):
|
|||||||
def execute(self, file_object, page_number):
|
def execute(self, file_object, page_number):
|
||||||
logger.debug('Parsing PDF page: %d', page_number)
|
logger.debug('Parsing PDF page: %d', page_number)
|
||||||
|
|
||||||
destination_descriptor, temp_filepath = mkstemp()
|
temporary_file_object = NamedTemporaryFile()
|
||||||
copyfile(file_object, temp_filepath)
|
copyfileobj(fsrc=file_object, fdst=temporary_file_object)
|
||||||
|
temporary_file_object.seek(0)
|
||||||
|
|
||||||
command = []
|
command = []
|
||||||
command.append(self.pdftotext_path)
|
command.append(self.pdftotext_path)
|
||||||
@@ -132,7 +134,7 @@ class PopplerParser(Parser):
|
|||||||
command.append(str(page_number))
|
command.append(str(page_number))
|
||||||
command.append('-l')
|
command.append('-l')
|
||||||
command.append(str(page_number))
|
command.append(str(page_number))
|
||||||
command.append(temp_filepath)
|
command.append(temporary_file_object.name)
|
||||||
command.append('-')
|
command.append('-')
|
||||||
|
|
||||||
proc = subprocess.Popen(
|
proc = subprocess.Popen(
|
||||||
@@ -142,12 +144,12 @@ class PopplerParser(Parser):
|
|||||||
return_code = proc.wait()
|
return_code = proc.wait()
|
||||||
if return_code != 0:
|
if return_code != 0:
|
||||||
logger.error(proc.stderr.readline())
|
logger.error(proc.stderr.readline())
|
||||||
fs_cleanup(temp_filepath, file_descriptor=destination_descriptor)
|
temporary_file_object.close()
|
||||||
|
|
||||||
raise ParserError
|
raise ParserError
|
||||||
|
|
||||||
output = proc.stdout.read()
|
output = proc.stdout.read()
|
||||||
fs_cleanup(temp_filepath, file_descriptor=destination_descriptor)
|
temporary_file_object.close()
|
||||||
|
|
||||||
if output == b'\x0c':
|
if output == b'\x0c':
|
||||||
logger.debug('Parser didn\'t return any output')
|
logger.debug('Parser didn\'t return any output')
|
||||||
|
|||||||
Reference in New Issue
Block a user