From 83ed62b2100e9a31e1d28ae56ffdb6ec79a5c4e0 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 24 Jun 2015 01:12:54 -0400 Subject: [PATCH] Remove office document parser now that document version are converted to intermediate format --- mayan/apps/ocr/parsers/__init__.py | 32 ------------------------------ 1 file changed, 32 deletions(-) diff --git a/mayan/apps/ocr/parsers/__init__.py b/mayan/apps/ocr/parsers/__init__.py index 508b1eee99..e59572b19d 100644 --- a/mayan/apps/ocr/parsers/__init__.py +++ b/mayan/apps/ocr/parsers/__init__.py @@ -8,10 +8,6 @@ import tempfile from common.settings import setting_temporary_directory from common.utils import copyfile -from converter.exceptions import OfficeConversionError -from converter.classes import ( - CONVERTER_OFFICE_FILE_MIMETYPES -) from ..settings import setting_pdftotext_path @@ -94,33 +90,6 @@ class SlateParser(Parser): document_page.save() -class OfficeParser(Parser): - """ - Parser for office document formats - """ - def parse(self, document_page, descriptor=None): - logger.debug('executing') - try: - office_converter = OfficeConverter() - document_file = document_page.document.document_save_to_temp_dir(document_page.document.checksum) - logger.debug('document_file: %s', document_file) - - office_converter.convert(document_file, mimetype=document_page.document.file_mimetype) - if office_converter.exists: - input_filepath = office_converter.output_filepath - logger.debug('office_converter.output_filepath: %s', input_filepath) - - # Now that the office document has been converted to PDF - # call the coresponding PDF parser in this new file - parse_document_page(document_page, descriptor=open(input_filepath), mimetype='application/pdf') - else: - raise ParserError - - except OfficeConversionError as exception: - logger.error(exception) - raise ParserError - - class PopplerParser(Parser): """ PDF parser using the pdftotext execute from the poppler package @@ -171,4 +140,3 @@ class PopplerParser(Parser): register_parser(mimetypes=['application/pdf'], parsers=[PopplerParser, SlateParser]) -# register_parser(mimetypes=office_converter.CONVERTER_OFFICE_FILE_MIMETYPES, parsers=[OfficeParser]) # TODO: FIX