Use python-magic to determine a document's mimetype otherwise fallback to use python's mimetypes library

This commit is contained in:
Roberto Rosario
2011-07-07 21:38:36 -04:00
parent 47de889163
commit 841b02c969
2 changed files with 38 additions and 11 deletions

View File

@@ -7,13 +7,12 @@ from django.contrib.auth.models import User
from django.contrib.contenttypes import generic from django.contrib.contenttypes import generic
from django.contrib.comments.models import Comment from django.contrib.comments.models import Comment
from python_magic import magic
from taggit.managers import TaggableManager from taggit.managers import TaggableManager
from dynamic_search.api import register from dynamic_search.api import register
from converter.api import get_page_count from converter.api import get_page_count
from converter import TRANFORMATION_CHOICES from converter import TRANFORMATION_CHOICES
from documents.utils import get_document_mimetype
from documents.conf.settings import CHECKSUM_FUNCTION from documents.conf.settings import CHECKSUM_FUNCTION
from documents.conf.settings import UUID_FUNCTION from documents.conf.settings import UUID_FUNCTION
from documents.conf.settings import STORAGE_BACKEND from documents.conf.settings import STORAGE_BACKEND
@@ -117,22 +116,15 @@ class Document(models.Model):
def update_mimetype(self, save=True): def update_mimetype(self, save=True):
""" """
Read a document's file and determine the mimetype by calling the Read a document's file and determine the mimetype by calling the
libmagic library get_mimetype wrapper
""" """
if self.exists(): if self.exists():
try: try:
source = self.open() self.file_mimetype, self.mime_encoding = get_document_mimetype(self)
mime = magic.Magic(mime=True)
self.file_mimetype = mime.from_buffer(source.read())
source.seek(0)
mime_encoding = magic.Magic(mime_encoding=True)
self.file_mime_encoding = mime_encoding.from_buffer(source.read())
except: except:
self.file_mimetype = u'' self.file_mimetype = u''
self.file_mime_encoding = u'' self.file_mime_encoding = u''
finally: finally:
if source:
source.close()
if save: if save:
self.save() self.save()

View File

@@ -2,6 +2,14 @@ import os
from common import TEMPORARY_DIRECTORY from common import TEMPORARY_DIRECTORY
try:
from python_magic import magic
USE_PYTHON_MAGIC = True
except:
import mimetypes
mimetypes.init()
USE_PYTHON_MAGIC = False
#http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python #http://stackoverflow.com/questions/123198/how-do-i-copy-a-file-in-python
def copyfile(source, dest, buffer_size=1024 * 1024): def copyfile(source, dest, buffer_size=1024 * 1024):
@@ -29,3 +37,30 @@ def copyfile(source, dest, buffer_size=1024 * 1024):
def document_save_to_temp_dir(document, filename, buffer_size=1024 * 1024): def document_save_to_temp_dir(document, filename, buffer_size=1024 * 1024):
temporary_path = os.path.join(TEMPORARY_DIRECTORY, filename) temporary_path = os.path.join(TEMPORARY_DIRECTORY, filename)
return document.save_to_file(temporary_path, buffer_size) return document.save_to_file(temporary_path, buffer_size)
def get_document_mimetype(document):
"""
Determine a documents mimetype by calling the system's libmagic
library via python-magic or fallback to use python's mimetypes
library
"""
file_mimetype = u''
file_mime_encoding = u''
if USE_PYTHON_MAGIC:
if document.exists():
try:
source = document.open()
mime = magic.Magic(mime=True)
file_mimetype = mime.from_buffer(source.read())
source.seek(0)
mime_encoding = magic.Magic(mime_encoding=True)
file_mime_encoding = mime_encoding.from_buffer(source.read())
finally:
if source:
source.close()
else:
file_mimetype, file_mime_encoding = mimetypes.guess_type(document.get_fullname())
return file_mimetype, file_mime_encoding