Email source code cleanups. Use the header variable returned from flanker to extract the sender and the subject. Remove unsed scanimage usage via sh.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2018-04-10 01:52:06 -04:00
parent 07444f0688
commit 3618778737
4 changed files with 11 additions and 35 deletions

View File

@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import base64
from io import BytesIO
import os
import time
import urllib

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals
from email import message_from_string
from email.header import decode_header
import imaplib
import json
import logging
@@ -10,14 +8,8 @@ import poplib
import subprocess
from flanker import mime
import sh
import yaml
try:
scanimage = sh.Command('/usr/bin/scanimage')
except sh.CommandNotFound:
scanimage = None
from django.core.exceptions import ValidationError
from django.core.files import File
from django.core.files.base import ContentFile
@@ -30,7 +22,6 @@ from django.utils.translation import ugettext_lazy as _
from model_utils.managers import InheritanceManager
from common.compat import collapse_rfc2231_value
from common.compressed_files import CompressedFile, NotACompressedFile
from common.utils import TemporaryFile
from converter.models import Transformation
@@ -560,37 +551,25 @@ class EmailBaseModel(IntervalBaseModel):
}
)
# From: http://bookmarks.honewatson.com/2009/08/11/
# python-gmail-imaplib-search-subject-get-attachments/
# TODO: Add lock to avoid running more than once concurrent same document
# download
# TODO: Use message ID for lock
@staticmethod
def getheader(header_text, default='ascii'):
headers = decode_header(header_text)
header_sections = [
force_text(text, charset or default) for text, charset in headers
]
return ''.join(header_sections)
@staticmethod
def process_message(source, message_text):
counter = 1
message = mime.from_string(force_str(message_text))
metadata_dictionary = {}
if source.subject_metadata_type:
metadata_dictionary[
source.subject_metadata_type.name
] = EmailBaseModel.getheader(email['Subject'])
] = message.headers.get('Subjet')
if source.from_metadata_type:
metadata_dictionary[
source.from_metadata_type.name
] = EmailBaseModel.getheader(email['From'])
counter = 1
metadata_dictionary = {}
message = mime.from_string(force_str(message_text))
] = message.headers.get('From')
# Messages are tree based, do nested processing of message parts until
# a message with no children is found, then work out way up.
@@ -601,8 +580,9 @@ class EmailBaseModel(IntervalBaseModel):
# Treat inlines as attachments, both are extracted and saved as
# documents
if message.is_attachment() or message.is_inline():
with ContentFile(content=message.body, name=message.detected_file_name) as file_object:
if message.detected_file_name == source.metadata_attachment_name:
label = message.detected_file_name or 'attachment-{}'.format(counter)
with ContentFile(content=message.body, name=label) as file_object:
if label == source.metadata_attachment_name:
metadata_dictionary = yaml.safe_load(
file_object.read()
)
@@ -612,8 +592,7 @@ class EmailBaseModel(IntervalBaseModel):
else:
document = source.handle_upload(
document_type=source.document_type,
file_object=file_object, label=message.detected_file_name,
expand=(
file_object=file_object, expand=(
source.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y
)
)

View File

@@ -1,6 +1,6 @@
from __future__ import unicode_literals
TEST_EMAIL_ATTACHMENT_AND_INLINE='''Subject: Test 03: inline and attachments
TEST_EMAIL_ATTACHMENT_AND_INLINE = '''Subject: Test 03: inline and attachments
To: Renat Gilmanov
Content-Type: multipart/mixed; boundary=001a11c24d809f1525051712cc78
@@ -64,7 +64,6 @@ Content-Disposition: attachment; filename="=?UTF-8?B?QW1wZWxtw6RubmNoZW4udHh0?="
SGFsbG8gQW1wZWxtw6RubmNoZW4hCg==
--RS1tYWlsIENsaWVudA==--'''
TEST_EMAIL_NO_CONTENT_TYPE = '''MIME-Version: 1.0
Received: by 10.0.0.1 with HTTP; Mon, 9 Apr 2018 00:00:00 -0400 (AST)
X-Originating-IP: [10.0.0.1]

View File

@@ -143,7 +143,6 @@ class EmailFilenameDecodingTestCase(BaseTestCase):
host='', username='', password='', store_body=True
)
def test_decode_email_base64_encoded_filename(self):
"""
Test decoding of base64 encoded e-mail attachment filename.