File metadata: Add file metadata app

Add the file metadata app. This app uses binary wrappers called drivers
to extract properties from the file of documents. The default driver
uses the exiftool to extract the EXIF record from JPEG images.
The exiftool can also extra some properties from other files like
PDFs, office files and sound file.

Signed-off-by: Roberto Rosario <Roberto.Rosario@mayan-edms.com>
This commit is contained in:
Roberto Rosario
2018-12-14 02:16:38 -04:00
parent 1efec6bd41
commit 0a7908baca
33 changed files with 1309 additions and 22 deletions

View File

@@ -16,9 +16,10 @@ __all__ = (
'TEST_NON_ASCII_COMPRESSED_DOCUMENT_FILENAME',
'TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH',
'TEST_NON_ASCII_DOCUMENT_FILENAME', 'TEST_NON_ASCII_DOCUMENT_PATH',
'TEST_PDF_INDIRECT_ROTATE_LABEL', 'TEST_PDF_INDIRECT_ROTATE_PATH',
'TEST_SMALL_DOCUMENT_CHECKSUM', 'TEST_SMALL_DOCUMENT_FILENAME',
'TEST_SMALL_DOCUMENT_PATH', 'TEST_DOCUMENT_VERSION_COMMENT_EDITED',
'TEST_PDF_DOCUMENT_FILENAME', 'TEST_PDF_INDIRECT_ROTATE_LABEL',
'TEST_PDF_INDIRECT_ROTATE_PATH', 'TEST_SMALL_DOCUMENT_CHECKSUM',
'TEST_SMALL_DOCUMENT_FILENAME', 'TEST_SMALL_DOCUMENT_PATH',
'TEST_DOCUMENT_VERSION_COMMENT_EDITED',
)
# Filenames
@@ -39,6 +40,7 @@ TEST_MULTI_PAGE_TIFF = 'multi_page.tiff'
TEST_NON_ASCII_COMPRESSED_DOCUMENT_FILENAME = 'I18N_title_áéíóúüñÑ.png.zip'
TEST_NON_ASCII_DOCUMENT_FILENAME = 'I18N_title_áéíóúüñÑ.png'
TEST_OFFICE_DOCUMENT = 'simple_2_page_document.doc'
TEST_PDF_DOCUMENT_FILENAME = 'mayan_11_1.pdf'
TEST_PDF_INDIRECT_ROTATE_LABEL = 'indirect_rotate.pdf'
TEST_SMALL_DOCUMENT_FILENAME = 'title_page.png'
TEST_SMALL_DOCUMENT_CHECKSUM = 'efa10e6cc21f83078aaa94d5cbe51de67b51af706143b\

View File

@@ -0,0 +1,3 @@
from __future__ import unicode_literals
default_app_config = 'mayan.apps.file_metadata.apps.FileMetadataApp'

View File

@@ -0,0 +1,13 @@
from __future__ import unicode_literals
from django.contrib import admin
from .models import StoredDriver
@admin.register(StoredDriver)
class StoredDriverAdmin(admin.ModelAdmin):
list_display = ('internal_name', 'label', 'driver_path')
def label(self, instance):
return instance.driver_label

View File

@@ -0,0 +1,187 @@
from __future__ import unicode_literals
from django.apps import apps
from django.db.models.signals import post_save
from django.utils.translation import ugettext_lazy as _
from kombu import Exchange, Queue
from mayan.apps.acls import ModelPermission
from mayan.apps.common import (
MayanAppConfig, menu_facet, menu_multi_item, menu_object
)
from mayan.apps.document_indexing.handlers import handler_index_document
from mayan.apps.documents.search import document_page_search, document_search
from mayan.apps.documents.signals import post_version_upload
from mayan.apps.events import ModelEventType
from mayan.apps.navigation import SourceColumn
from mayan.celery import app
from .drivers import * # NOQA
from .events import (
event_file_metadata_document_version_finish,
event_file_metadata_document_version_submit
)
from .handlers import (
handler_initialize_new_document_type_settings,
handler_process_document_version
)
from .links import (
link_document_driver_list, link_document_file_metadata_list,
link_document_submit, link_document_submit_multiple,
link_document_type_file_metadata_settings
)
from .permissions import (
permission_document_type_file_metadata_setup,
permission_file_metadata_submit, permission_file_metadata_view
)
from .signals import post_document_version_file_metadata_processing
from .utils import (
method_document_submit, method_document_version_submit,
method_get_document_file_metadata,
method_get_document_version_file_metadata
)
class FileMetadataApp(MayanAppConfig):
app_namespace = 'file_metadata'
app_url = 'file_metadata'
has_test = True
name = 'mayan.apps.file_metadata'
verbose_name = _('File metadata')
def ready(self):
super(FileMetadataApp, self).ready()
FileMetadataEntry = self.get_model(model_name='FileMetadataEntry')
DocumentVersionDriverEntry = self.get_model(
model_name='DocumentVersionDriverEntry'
)
Document = apps.get_model(
app_label='documents', model_name='Document'
)
DocumentTypeSettings = self.get_model(
model_name='DocumentTypeSettings'
)
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'
)
DocumentVersion = apps.get_model(
app_label='documents', model_name='DocumentVersion'
)
Document.add_to_class(
name='submit_for_file_metadata_processing',
value=method_document_submit
)
Document.add_to_class(
name='get_file_metadata',
value=method_get_document_file_metadata
)
DocumentVersion.add_to_class(
name='submit_for_file_metadata_processing',
value=method_document_version_submit
)
DocumentVersion.add_to_class(
name='get_file_metadata',
value=method_get_document_version_file_metadata
)
ModelEventType.register(
model=Document, event_types=(
event_file_metadata_document_version_finish,
event_file_metadata_document_version_submit
)
)
ModelPermission.register(
model=Document, permissions=(
permission_file_metadata_submit, permission_file_metadata_view,
)
)
ModelPermission.register(
model=DocumentType, permissions=(
permission_document_type_file_metadata_setup,
)
)
ModelPermission.register_inheritance(
model=DocumentTypeSettings, related='document_type',
)
SourceColumn(source=FileMetadataEntry, attribute='key')
SourceColumn(source=FileMetadataEntry, attribute='value')
SourceColumn(
source=DocumentVersionDriverEntry, attribute='driver'
)
SourceColumn(
source=DocumentVersionDriverEntry, attribute='driver__internal_name'
)
SourceColumn(
source=DocumentVersionDriverEntry, attribute='get_attribute_count'
)
app.conf.task_queues.append(
Queue(
'file_metadata', Exchange('file_metadata'),
routing_key='file_metadata'
),
)
app.conf.task_routes.update(
{
'mayan.apps.file_metadata.tasks.task_process_document_version': {
'queue': 'file_metadata'
},
}
)
document_search.add_model_field(
field='versions__file_metadata_drivers__entries__key',
label=_('File metadata key')
)
document_search.add_model_field(
field='versions__file_metadata_drivers__entries__value',
label=_('File metadata value')
)
document_page_search.add_model_field(
field='document_version__file_metadata_drivers__entries__key',
label=_('File metadata key')
)
document_page_search.add_model_field(
field='document_version__file_metadata_drivers__entries__value',
label=_('File metadata value')
)
menu_facet.bind_links(
links=(link_document_driver_list,), sources=(Document,)
)
menu_object.bind_links(
links=(link_document_submit,), sources=(Document,)
)
menu_object.bind_links(
links=(link_document_type_file_metadata_settings,),
sources=(DocumentType,)
)
menu_object.bind_links(
links=(link_document_file_metadata_list,),
sources=(DocumentVersionDriverEntry,)
)
menu_multi_item.bind_links(
links=(link_document_submit_multiple,), sources=(Document,)
)
post_save.connect(
dispatch_uid='file_metadata_handler_initialize_new_document_type_settings',
receiver=handler_initialize_new_document_type_settings,
sender=DocumentType
)
post_version_upload.connect(
dispatch_uid='file_metadata_handler_process_document_version',
receiver=handler_process_document_version, sender=DocumentVersion
)
post_document_version_file_metadata_processing.connect(
dispatch_uid='file_metadata_handler_index_document',
receiver=handler_index_document,
sender=DocumentVersion
)

View File

@@ -0,0 +1,80 @@
from __future__ import unicode_literals
import logging
from django.apps import apps
from .events import event_file_metadata_document_version_finish
from .exceptions import FileMetadataDriverError
from .signals import post_document_version_file_metadata_processing
logger = logging.getLogger(__name__)
class FileMetadataDriver(object):
_registry = {}
@classmethod
def register(cls, mimetypes):
for mimetype in mimetypes:
cls._registry.setdefault(mimetype, []).append(cls)
@classmethod
def process_document_version(cls, document_version):
for driver_class in cls._registry.get(document_version.mimetype, ()):
try:
driver = driver_class()
driver.process(document_version=document_version)
except FileMetadataDriverError:
# If driver raises error, try next in the list
pass
else:
# If driver was successfull there is no need to try
# others in the list for this mimetype
event_file_metadata_document_version_finish.commit(
action_object=document_version.document,
target=document_version
)
post_document_version_file_metadata_processing.send(
sender=document_version.__class__,
instance=document_version
)
return
def process(self, document_version):
logger.info(
'Starting processing document version: %s', document_version
)
StoredDriver = apps.get_model(
app_label='file_metadata', model_name='StoredDriver'
)
driver_path = '.'.join([self.__module__, self.__class__.__name__])
driver, created = StoredDriver.objects.get_or_create(
driver_path=driver_path, defaults={
'internal_name': self.internal_name
}
)
driver.driver_entries.filter(
document_version=document_version
).delete()
document_version_driver_entry = driver.driver_entries.create(
document_version=document_version
)
for key, value in self._process(document_version=document_version).items():
document_version_driver_entry.entries.create(
key=key, value=value
)
def _process(self, document_version):
raise NotImplementedError(
'Your %s class has not defined the required '
'process_document_version() method.' % self.__class__.__name__
)

View File

@@ -0,0 +1 @@
from .exiftool import * # NOQA

View File

@@ -0,0 +1,70 @@
from __future__ import unicode_literals
import json
import logging
import sh
from django.utils.translation import ugettext_lazy as _
from mayan.apps.common.utils import fs_cleanup, mkstemp
from ..classes import FileMetadataDriver
from ..settings import setting_drivers_arguments
logger = logging.getLogger(__name__)
class EXIFToolDriver(FileMetadataDriver):
label = _('EXIF Tool')
internal_name = 'exiftool'
def __init__(self, *args, **kwargs):
try:
self.command_exiftool = sh.Command(
setting_drivers_arguments.value['exif_driver']['exiftool_path']
)
except sh.CommandNotFound:
self.command_exiftool = None
else:
self.command_exiftool = self.command_exiftool.bake('-j')
def _process(self, document_version):
new_file_object, temp_filename = mkstemp()
try:
document_version.save_to_file(filepath=temp_filename)
result = self.command_exiftool(temp_filename)
return json.loads(result.stdout)[0]
finally:
fs_cleanup(filename=temp_filename)
EXIFToolDriver.register(
mimetypes=(
'application/msword',
'application/pdf',
'application/vnd.oasis.opendocument.text',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'application/x-bittorrent',
'application/x-gzip',
'application/x-rar-compressed',
'application/x-shockwave-flash',
'application/zip',
'application/zip',
'audio/x-pn-realaudio-plugin',
'audio/x-wav',
'image/jpeg',
'image/png',
'image/svg+xml',
'image/tiff',
'image/x-portable-pixmap',
'text/html',
'text/rtf',
'text/x-sh',
'video/mp4',
'video/webm',
'video/x-flv',
'video/x-matroska'
)
)

View File

@@ -0,0 +1,18 @@
from __future__ import absolute_import, unicode_literals
from django.utils.translation import ugettext_lazy as _
from mayan.apps.events import EventTypeNamespace
namespace = EventTypeNamespace(
name='file_metadata', label=_('File metadata')
)
event_file_metadata_document_version_submit = namespace.add_event_type(
label=_('Document version submitted for file metadata processing'),
name='document_version_submit'
)
event_file_metadata_document_version_finish = namespace.add_event_type(
label=_('Document version file metadata processing finished'),
name='document_version_finish'
)

View File

@@ -0,0 +1,5 @@
from __future__ import unicode_literals
class FileMetadataDriverError(Exception):
"""Base file metadata driver exception"""

View File

@@ -0,0 +1,21 @@
from __future__ import unicode_literals
from django.apps import apps
from .settings import setting_auto_process
def handler_initialize_new_document_type_settings(sender, instance, **kwargs):
DocumentTypeSettings = apps.get_model(
app_label='file_metadata', model_name='DocumentTypeSettings'
)
if kwargs['created']:
DocumentTypeSettings.objects.create(
document_type=instance, auto_process=setting_auto_process.value
)
def handler_process_document_version(sender, instance, **kwargs):
if instance.document.document_type.file_metadata_settings.auto_process:
instance.submit_for_file_metadata_processing()

View File

@@ -0,0 +1,7 @@
from __future__ import absolute_import, unicode_literals
from mayan.apps.appearance.classes import Icon
icon_file_metadata = Icon(
driver_name='fontawesome', symbol='chess-board'
)

View File

@@ -0,0 +1,37 @@
from __future__ import unicode_literals
from django.utils.translation import ugettext_lazy as _
from mayan.apps.navigation import Link
from .icons import icon_file_metadata
from .permissions import (
permission_document_type_file_metadata_setup,
permission_file_metadata_submit, permission_file_metadata_view
)
link_document_driver_list = Link(
args='resolved_object.id', icon_class=icon_file_metadata,
permissions=(permission_file_metadata_view,), text=_('File metadata'),
view='file_metadata:document_driver_list',
)
link_document_file_metadata_list = Link(
args=('resolved_object.id',), icon_class=icon_file_metadata,
permissions=(permission_file_metadata_view,), text=_('Attributes'),
view='file_metadata:document_version_driver_file_metadata_list',
)
link_document_submit = Link(
args='resolved_object.id', permissions=(permission_file_metadata_submit,),
text=_('Submit for file metadata'), view='file_metadata:document_submit'
)
link_document_submit_multiple = Link(
text=_('Submit for file metadata'),
view='file_metadata:document_submit_multiple'
)
link_document_type_file_metadata_settings = Link(
args='resolved_object.id',
icon_class=icon_file_metadata,
permissions=(permission_document_type_file_metadata_setup,),
text=_('Setup file metadata'),
view='file_metadata:document_type_settings',
)

View File

@@ -0,0 +1,3 @@
from __future__ import unicode_literals
DEFAULT_EXIF_PATH = '/usr/bin/exiftool'

View File

@@ -0,0 +1,23 @@
from __future__ import unicode_literals
import logging
from django.apps import apps
from django.db import models
logger = logging.getLogger(__name__)
class DocumentTypeSettingsManager(models.Manager):
def get_by_natural_key(self, document_type_natural_key):
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'
)
try:
document_type = DocumentType.objects.get_by_natural_key(
document_type_natural_key
)
except DocumentType.DoesNotExist:
raise self.model.DoesNotExist
return self.get(document_type__pk=document_type.pk)

View File

@@ -0,0 +1,160 @@
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
def operation_create_setting_for_existing_document_types(apps, schema_editor):
DocumentType = apps.get_model(
app_label='documents', model_name='DocumentType'
)
DocumentTypeSettings = apps.get_model(
app_label='file_metadata', model_name='DocumentTypeSettings'
)
for document_type in DocumentType.objects.using(schema_editor.connection.alias).all():
DocumentTypeSettings.objects.using(
schema_editor.connection.alias
).create(document_type=document_type)
class Migration(migrations.Migration):
initial = True
dependencies = [
('documents', '0049_auto_20181211_0011'),
]
operations = [
migrations.CreateModel(
name='DocumentTypeSettings',
fields=[
(
'id', models.AutoField(
auto_created=True, primary_key=True, serialize=False,
verbose_name='ID'
)
),
(
'auto_process', models.BooleanField(
default=True, verbose_name='Automatically queue '
'newly created documents for processing.'
)
),
(
'document_type', models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
related_name='file_metadata_settings',
to='documents.DocumentType',
verbose_name='Document type'
)
),
],
options={
'verbose_name': 'Document type settings',
'verbose_name_plural': 'Document types settings',
},
),
migrations.CreateModel(
name='DocumentVersionDriverEntry',
fields=[
(
'id', models.AutoField(
auto_created=True, primary_key=True, serialize=False,
verbose_name='ID'
)
),
(
'document_version', models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='file_metadata_drivers',
to='documents.DocumentVersion',
verbose_name='Document version'
)
),
],
options={
'ordering': ('document_version', 'driver'),
'verbose_name': 'Document version driver entry',
'verbose_name_plural': 'Document version driver entries',
},
),
migrations.CreateModel(
name='FileMetadataEntry',
fields=[
(
'id', models.AutoField(
auto_created=True, primary_key=True, serialize=False,
verbose_name='ID'
)
),
(
'key', models.CharField(
db_index=True, max_length=255, verbose_name='Key'
)
),
(
'value', models.CharField(
db_index=True, max_length=255, verbose_name='Value'
)
),
(
'document_version_driver_entry', models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='entries',
to='file_metadata.DocumentVersionDriverEntry',
verbose_name='Document version driver entry'
)
),
],
options={
'ordering': ('key', 'value'),
'verbose_name': 'File metadata entry',
'verbose_name_plural': 'File metadata entries',
},
),
migrations.CreateModel(
name='StoredDriver',
fields=[
(
'id', models.AutoField(
auto_created=True, primary_key=True, serialize=False,
verbose_name='ID'
)
),
(
'driver_path', models.CharField(
max_length=255, verbose_name='Driver path'
)
),
(
'internal_name', models.CharField(
db_index=True, max_length=128,
verbose_name='Internal name'
)
),
],
options={
'ordering': ('internal_name',),
'verbose_name': 'Driver',
'verbose_name_plural': 'Drivers',
},
),
migrations.AddField(
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='driver_entries', to='file_metadata.StoredDriver',
verbose_name='Driver'
),
model_name='documentversiondriverentry',
name='driver'
),
migrations.AlterUniqueTogether(
name='documentversiondriverentry',
unique_together=set([('driver', 'document_version')]),
),
migrations.RunPython(
code=operation_create_setting_for_existing_document_types
),
]

View File

@@ -0,0 +1,117 @@
from __future__ import unicode_literals
import logging
from django.db import models
from django.utils.encoding import python_2_unicode_compatible
from django.utils.functional import cached_property
from django.utils.module_loading import import_string
from django.utils.translation import force_text
from django.utils.translation import ugettext_lazy as _
from mayan.apps.documents.models import DocumentType, DocumentVersion
from .managers import DocumentTypeSettingsManager
logger = logging.getLogger(__name__)
@python_2_unicode_compatible
class StoredDriver(models.Model):
driver_path = models.CharField(
max_length=255, verbose_name=_('Driver path')
)
internal_name = models.CharField(
db_index=True, max_length=128, verbose_name=_('Internal name')
)
class Meta:
ordering = ('internal_name',)
verbose_name = _('Driver')
verbose_name_plural = _('Drivers')
def __str__(self):
return force_text(self.driver_label)
@cached_property
def driver_class(self):
return import_string(self.driver_path)
@cached_property
def driver_label(self):
return self.driver_class.label
@python_2_unicode_compatible
class DocumentVersionDriverEntry(models.Model):
driver = models.ForeignKey(
related_name='driver_entries', to=StoredDriver,
verbose_name=_('Driver')
)
document_version = models.ForeignKey(
related_name='file_metadata_drivers', to=DocumentVersion,
verbose_name=_('Document version')
)
class Meta:
ordering = ('document_version', 'driver')
unique_together = ('driver', 'document_version')
verbose_name = _('Document version driver entry')
verbose_name_plural = _('Document version driver entries')
def __str__(self):
return force_text(self.driver)
def get_attribute_count(self):
return self.entries.count()
get_attribute_count.short_description = _('Attribute count')
class DocumentTypeSettings(models.Model):
"""
Model to store the file metadata settings for a document type.
"""
document_type = models.OneToOneField(
on_delete=models.CASCADE, related_name='file_metadata_settings',
to=DocumentType, unique=True, verbose_name=_('Document type')
)
auto_process = models.BooleanField(
default=True, verbose_name=_(
'Automatically queue newly created documents for processing.'
)
)
objects = DocumentTypeSettingsManager()
class Meta:
verbose_name = _('Document type settings')
verbose_name_plural = _('Document types settings')
def natural_key(self):
return self.document_type.natural_key()
natural_key.dependencies = ['documents.DocumentType']
@python_2_unicode_compatible
class FileMetadataEntry(models.Model):
document_version_driver_entry = models.ForeignKey(
related_name='entries', to=DocumentVersionDriverEntry,
verbose_name=_('Document version driver entry')
)
key = models.CharField(
db_index=True, max_length=255, verbose_name=_('Key')
)
value = models.CharField(
db_index=True, max_length=255, verbose_name=_('Value')
)
class Meta:
ordering = ('key', 'value')
verbose_name = _('File metadata entry')
verbose_name_plural = _('File metadata entries')
def __str__(self):
return '{}: {}: {}'.format(
self.document_version_driver_entry, self.key, self.value
)

View File

@@ -0,0 +1,20 @@
from __future__ import absolute_import, unicode_literals
from django.utils.translation import ugettext_lazy as _
from mayan.apps.permissions import PermissionNamespace
namespace = PermissionNamespace('file_metadata', _('File metadata'))
permission_document_type_file_metadata_setup = namespace.add_permission(
name='file_metadata_document_type_setup',
label=_('Change document type file metadata settings')
)
permission_file_metadata_submit = namespace.add_permission(
name='file_metadata_submit', label=_(
'Submit document for file metadata processing'
)
)
permission_file_metadata_view = namespace.add_permission(
name='file_metadata_view', label=_('View file metadata')
)

View File

@@ -0,0 +1,13 @@
from __future__ import absolute_import, unicode_literals
from django.utils.translation import ugettext_lazy as _
from mayan.apps.task_manager.classes import CeleryQueue
queue = CeleryQueue(
label=_('File metadata'), name='file_metadata'
)
queue.add_task_type(
label=_('Process document version'),
name='mayan.apps.file_metadata.tasks.task_process_document_version'
)

View File

@@ -0,0 +1,23 @@
from __future__ import unicode_literals
from django.utils.translation import ugettext_lazy as _
from mayan.apps.smart_settings import Namespace
from .literals import DEFAULT_EXIF_PATH
namespace = Namespace(name='file_metadata', label=_('File metadata'))
setting_drivers_arguments = namespace.add_setting(
global_name='FILE_METADATA_DRIVERS_ARGUMENTS',
default={'exif_driver': {'exiftool_path': DEFAULT_EXIF_PATH}}, help_text=_(
'Arguments to pass to the drivers.'
)
)
setting_auto_process = namespace.add_setting(
global_name='FILE_METADATA_AUTO_PROCESS', default=True,
help_text=_(
'Set new document types to perform file metadata processing '
'automatically by default.'
)
)

View File

@@ -0,0 +1,7 @@
from __future__ import unicode_literals
from django.dispatch import Signal
post_document_version_file_metadata_processing = Signal(
providing_args=('instance',), use_caching=True
)

View File

@@ -0,0 +1,24 @@
from __future__ import unicode_literals
import logging
from django.apps import apps
from mayan.celery import app
from .classes import FileMetadataDriver
logger = logging.getLogger(__name__)
@app.task(ignore_result=True)
def task_process_document_version(document_version_id):
DocumentVersion = apps.get_model(
app_label='documents', model_name='DocumentVersion'
)
document_version = DocumentVersion.objects.get(pk=document_version_id)
FileMetadataDriver.process_document_version(
document_version=document_version
)

View File

@@ -0,0 +1,10 @@
from __future__ import unicode_literals
TEST_DRIVER_INTERNAL_NAME = 'exiftool'
TEST_FILE_METADATA_KEY = 'FileType'
TEST_FILE_METADATA_VALUE = 'PNG'
TEST_FILE_METADATA_INDEX_NODE_TEMPLATE = "{{{{ document.get_file_metadata('{}.{}')}}}}".format(
TEST_DRIVER_INTERNAL_NAME, TEST_FILE_METADATA_KEY
)
TEST_PDF_FILE_METADATA_DOTTED_NAME = 'exiftool.Producer'
TEST_PDF_FILE_METADATA_VALUE = 'pdfTeX-1.40.10'

View File

@@ -0,0 +1,21 @@
from __future__ import unicode_literals
from mayan.apps.common.tests import BaseTestCase
from mayan.apps.documents.tests import (
TEST_PDF_DOCUMENT_FILENAME, DocumentTestMixin
)
from .literals import (
TEST_PDF_FILE_METADATA_DOTTED_NAME, TEST_PDF_FILE_METADATA_VALUE
)
class EXIFToolDriverTestCase(DocumentTestMixin, BaseTestCase):
test_document_filename = TEST_PDF_DOCUMENT_FILENAME
def test_driver_entries(self):
self.document.submit_for_file_metadata_processing()
value = self.document.get_file_metadata(
dotted_name=TEST_PDF_FILE_METADATA_DOTTED_NAME
)
self.assertEqual(value, TEST_PDF_FILE_METADATA_VALUE)

View File

@@ -0,0 +1,40 @@
from __future__ import unicode_literals
from actstream.models import Action
from mayan.apps.documents.tests.test_models import GenericDocumentTestCase
from ..events import (
event_file_metadata_document_version_finish,
event_file_metadata_document_version_submit
)
class FileMetadataEventsTestCase(GenericDocumentTestCase):
def test_document_version_finish_event(self):
Action.objects.all().delete()
self.document.latest_version.submit_for_file_metadata_processing()
# Get the most recent action
action = Action.objects.order_by('-timestamp').first()
self.assertEqual(
action.target, self.document.latest_version
)
self.assertEqual(
action.verb, event_file_metadata_document_version_finish.id
)
def test_document_version_submit_event(self):
Action.objects.all().delete()
self.document.latest_version.submit_for_file_metadata_processing()
# Get the oldest action
action = Action.objects.order_by('-timestamp').last()
self.assertEqual(
action.target, self.document.latest_version
)
self.assertEqual(
action.verb, event_file_metadata_document_version_submit.id
)

View File

@@ -0,0 +1,32 @@
from __future__ import unicode_literals
from mayan.apps.common.tests import BaseTestCase
from mayan.apps.document_indexing.models import Index, IndexInstanceNode
from mayan.apps.document_indexing.tests.literals import TEST_INDEX_LABEL
from mayan.apps.documents.tests import DocumentTestMixin
from .literals import (
TEST_FILE_METADATA_INDEX_NODE_TEMPLATE, TEST_FILE_METADATA_VALUE
)
class IndexingTestCase(DocumentTestMixin, BaseTestCase):
auto_upload_document = False
def test_indexing(self):
index = Index.objects.create(label=TEST_INDEX_LABEL)
index.document_types.add(self.document_type)
root = index.template_root
index.node_templates.create(
parent=root, expression=TEST_FILE_METADATA_INDEX_NODE_TEMPLATE,
link_documents=True
)
self.document = self.upload_document()
self.document.submit_for_file_metadata_processing()
self.assertTrue(
self.document in IndexInstanceNode.objects.get(
value=TEST_FILE_METADATA_VALUE
).documents.all()
)

View File

@@ -0,0 +1,137 @@
from __future__ import unicode_literals
from django.test import override_settings
from mayan.apps.documents.tests import GenericDocumentViewTestCase
from ..permissions import (
permission_document_type_file_metadata_setup,
permission_file_metadata_submit, permission_file_metadata_view
)
from .literals import TEST_FILE_METADATA_KEY
@override_settings(FILE_METADATA_AUTO_PROCESS=True)
class FileMetadataViewsTestCase(GenericDocumentViewTestCase):
def setUp(self):
super(FileMetadataViewsTestCase, self).setUp()
self.login_user()
def _request_document_version_driver_list_view(self):
return self.get(
args=(self.document.pk,),
viewname='file_metadata:document_driver_list',
)
def test_document_version_driver_list_view_no_permission(self):
response = self._request_document_version_driver_list_view()
self.assertEqual(response.status_code, 403)
def test_document_version_driver_list_view_with_access(self):
self.grant_access(
permission=permission_file_metadata_view, obj=self.document
)
response = self._request_document_version_driver_list_view()
self.assertContains(
response=response, text=self.document.label, status_code=200
)
def _request_document_version_file_metadata_list_view(self):
return self.get(
args=(
self.document.latest_version.file_metadata_drivers.first().pk,
),
viewname='file_metadata:document_version_driver_file_metadata_list',
)
def test_document_version_file_metadata_list_view_no_permission(self):
response = self._request_document_version_file_metadata_list_view()
self.assertNotContains(
response=response, text=TEST_FILE_METADATA_KEY, status_code=403
)
def test_document_version_file_metadata_list_view_with_access(self):
self.grant_access(
permission=permission_file_metadata_view, obj=self.document
)
response = self._request_document_version_file_metadata_list_view()
self.assertContains(
response=response, text=TEST_FILE_METADATA_KEY, status_code=200
)
def _request_document_submit_view(self):
return self.post(
viewname='file_metadata:document_submit', args=(self.document.pk,)
)
def test_document_submit_view_no_permission(self):
self.document.latest_version.file_metadata_drivers.all().delete()
response = self._request_document_submit_view()
self.assertEqual(response.status_code, 302)
self.assertEqual(
self.document.latest_version.file_metadata_drivers.count(), 0
)
def test_document_submit_view_with_access(self):
self.document.latest_version.file_metadata_drivers.all().delete()
self.grant_access(
permission=permission_file_metadata_submit, obj=self.document
)
response = self._request_document_submit_view()
self.assertEqual(response.status_code, 302)
self.assertEqual(
self.document.latest_version.file_metadata_drivers.count(), 1
)
def _request_multiple_document_submit_view(self):
return self.post(
viewname='file_metadata:document_submit_multiple',
data={
'id_list': self.document.pk,
}
)
def test_multiple_document_submit_view_no_permission(self):
self.document.latest_version.file_metadata_drivers.all().delete()
response = self._request_multiple_document_submit_view()
self.assertEqual(response.status_code, 302)
self.assertEqual(
self.document.latest_version.file_metadata_drivers.count(), 0
)
def test_multiple_document_submit_view_with_access(self):
self.document.latest_version.file_metadata_drivers.all().delete()
self.grant_access(
permission=permission_file_metadata_submit, obj=self.document
)
response = self._request_multiple_document_submit_view()
self.assertEqual(response.status_code, 302)
self.assertEqual(
self.document.latest_version.file_metadata_drivers.count(), 1
)
class DocumentTypeViewsTestCase(GenericDocumentViewTestCase):
def setUp(self):
super(DocumentTypeViewsTestCase, self).setUp()
self.login_user()
def _request_document_type_settings_view(self):
return self.get(
viewname='file_metadata:document_type_settings',
args=(self.document.document_type.pk,)
)
def test_document_type_settings_view_no_permission(self):
response = self._request_document_type_settings_view()
self.assertEqual(response.status_code, 403)
def test_document_type_settings_view_with_access(self):
self.grant_access(
permission=permission_document_type_file_metadata_setup,
obj=self.document.document_type
)
response = self._request_document_type_settings_view()
self.assertEqual(response.status_code, 200)

View File

@@ -0,0 +1,33 @@
from __future__ import unicode_literals
from django.conf.urls import url
from .views import (
DocumentDriverListView, DocumentSubmitView, DocumentTypeSettingsEditView,
DocumentVersionDriverEntryFileMetadataListView
)
urlpatterns = [
url(
r'^documents/(?P<pk>\d+)/drivers/$', DocumentDriverListView.as_view(),
name='document_driver_list'
),
url(
r'^documents/(?P<pk>\d+)/submit/$', DocumentSubmitView.as_view(),
name='document_submit'
),
url(
r'^documents/multiple/submit/$', DocumentSubmitView.as_view(),
name='document_submit_multiple'
),
url(
r'^document_types/(?P<pk>\d+)/ocr/settings/$',
DocumentTypeSettingsEditView.as_view(),
name='document_type_settings'
),
url(
r'^document_version_driver/(?P<pk>\d+)/attributes/$',
DocumentVersionDriverEntryFileMetadataListView.as_view(),
name='document_version_driver_file_metadata_list'
),
]

View File

@@ -0,0 +1,48 @@
from __future__ import unicode_literals
from .events import event_file_metadata_document_version_submit
from .tasks import task_process_document_version
def method_document_submit(self):
latest_version = self.latest_version
# Don't error out if document has no version
if latest_version:
latest_version.submit_for_file_metadata_processing()
def method_document_version_submit(self):
event_file_metadata_document_version_submit.commit(
action_object=self.document, target=self
)
task_process_document_version.apply_async(
kwargs={
'document_version_id': self.pk,
}
)
def method_get_document_file_metadata(self, dotted_name):
latest_version = self.latest_version
# Don't error out if document has no version
if latest_version:
return latest_version.get_file_metadata(
dotted_name=dotted_name
)
def method_get_document_version_file_metadata(self, dotted_name):
driver_internal_name, key = dotted_name.split('.')
try:
document_driver = self.file_metadata_drivers.get(
driver__internal_name=driver_internal_name
)
except self.file_metadata_drivers.model.DoesNotExist:
return
else:
try:
return document_driver.entries.get(key=key).value
except document_driver.entries.model.DoesNotExist:
return

View File

@@ -0,0 +1,125 @@
from __future__ import absolute_import, unicode_literals
from django.shortcuts import get_object_or_404
from django.urls import reverse_lazy
from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ungettext
from mayan.apps.acls.models import AccessControlList
from mayan.apps.common.views import (
MultipleObjectConfirmActionView, SingleObjectEditView,
SingleObjectListView
)
from mayan.apps.documents.models import Document, DocumentType
from .icons import icon_file_metadata
from .models import DocumentVersionDriverEntry
from .permissions import (
permission_document_type_file_metadata_setup,
permission_file_metadata_submit, permission_file_metadata_view
)
class DocumentDriverListView(SingleObjectListView):
def get_extra_context(self):
return {
'hide_object': True,
'no_results_icon': icon_file_metadata,
'no_results_text': _(
'File metadata are the attributes of the document\'s file. '
'They can range from camera information used to take a photo '
'to the author that created a file. File metadata are set '
'when the document\'s file was first created. File metadata '
'attributes reside in the file itself. They are not the '
'same as the document metadata, which are user defined and '
'reside in the database.'
),
'no_results_title': _('No file metadata available.'),
'object': self.get_object(),
'title': _(
'File metadata drivers for: %s'
) % self.get_object(),
}
def get_object(self):
document = get_object_or_404(klass=Document, pk=self.kwargs['pk'])
AccessControlList.objects.check_access(
permissions=permission_file_metadata_view,
user=self.request.user, obj=document
)
return document
def get_object_list(self):
return self.get_object().latest_version.file_metadata_drivers.all()
class DocumentVersionDriverEntryFileMetadataListView(SingleObjectListView):
def get_extra_context(self):
return {
'hide_object': True,
'no_results_title': _('No file metadata available.'),
'object': self.get_object().document_version.document,
'title': _(
'File metadata attribures for: %(document)s, for driver: %(driver)s'
) % {
'document': self.get_object().document_version.document,
'driver': self.get_object().driver
},
}
def get_object(self):
document_version_driver_entry = get_object_or_404(
klass=DocumentVersionDriverEntry, pk=self.kwargs['pk']
)
AccessControlList.objects.check_access(
obj=document_version_driver_entry.document_version,
permissions=permission_file_metadata_view,
user=self.request.user,
)
return document_version_driver_entry
def get_object_list(self):
return self.get_object().entries.all()
class DocumentSubmitView(MultipleObjectConfirmActionView):
model = Document
object_permission = permission_file_metadata_submit
success_message = '%(count)d document submitted to the file metadata queue.'
success_message_plural = '%(count)d documents submitted to the file metadata queue.'
def get_extra_context(self):
queryset = self.get_queryset()
result = {
'title': ungettext(
'Submit the selected document to the file metadata queue?',
'Submit the selected documents to the file metadata queue?',
queryset.count()
)
}
return result
def object_action(self, form, instance):
instance.submit_for_file_metadata_processing()
class DocumentTypeSettingsEditView(SingleObjectEditView):
fields = ('auto_process',)
object_permission = permission_document_type_file_metadata_setup
post_action_redirect = reverse_lazy('documents:document_type_list')
def get_document_type(self):
return get_object_or_404(DocumentType, pk=self.kwargs['pk'])
def get_extra_context(self):
return {
'object': self.get_document_type(),
'title': _(
'Edit file metadata settings for document type: %s'
) % self.get_document_type()
}
def get_object(self, queryset=None):
return self.get_document_type().file_metadata_settings

View File

@@ -105,6 +105,7 @@ INSTALLED_APPS = (
'mayan.apps.document_signatures',
'mayan.apps.document_states',
'mayan.apps.documents',
'mayan.apps.file_metadata',
'mayan.apps.linking',
'mayan.apps.mailer',
'mayan.apps.mayan_statistics',

View File

@@ -2,20 +2,43 @@ from __future__ import absolute_import, unicode_literals
from .. import * # NOQA
CELERY_TASK_ALWAYS_EAGER = True
CELERY_TASK_EAGER_PROPAGATES = True
COMMON_PRODUCTION_ERROR_LOG_PATH = '/tmp/mayan-errors.log'
DOCUMENT_PARSING_AUTO_PARSING = False
FILE_METADATA_AUTO_PROCESS = False
INSTALLED_APPS += ('test_without_migrations',)
INSTALLED_APPS = [
cls for cls in INSTALLED_APPS if cls != 'whitenoise.runserver_nostatic'
]
COMMON_PRODUCTION_ERROR_LOG_PATH = '/tmp/mayan-errors.log'
# Remove whitenoise from middlewares. Causes out of memory errors during test
# suit
MIDDLEWARE = [
cls for cls in MIDDLEWARE if cls != 'whitenoise.middleware.WhiteNoiseMiddleware'
]
# Remove middlewares not used for tests
MIDDLEWARE = [
cls for cls in MIDDLEWARE if cls not in [
'common.middleware.error_logging.ErrorLoggingMiddleware',
'django.middleware.security.SecurityMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'django.middleware.locale.LocaleMiddleware',
'common.middleware.timezone.TimezoneMiddleware',
'common.middleware.ajax_redirect.AjaxRedirect',
]
]
OCR_AUTO_OCR = False
# User a simpler password hasher
PASSWORD_HASHERS = (
'django.contrib.auth.hashers.MD5PasswordHasher',
@@ -32,20 +55,3 @@ TEMPLATES[0]['OPTIONS']['loaders'] = (
)
),
)
CELERY_TASK_ALWAYS_EAGER = True
CELERY_TASK_EAGER_PROPAGATES = True
# Remove middlewares not used for tests
MIDDLEWARE = [
cls for cls in MIDDLEWARE if cls not in [
'common.middleware.error_logging.ErrorLoggingMiddleware',
'django.middleware.security.SecurityMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'django.middleware.locale.LocaleMiddleware',
'common.middleware.timezone.TimezoneMiddleware',
'common.middleware.ajax_redirect.AjaxRedirect',
]
]