diff --git a/HISTORY.rst b/HISTORY.rst index 7f064833e2..c256289392 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -140,6 +140,7 @@ Reduces clutter and unpredictable column size. * Remove the full name from the user list. * Add the first name and last name to the user list. +* Add file metadata app. 3.1.11 (2019-04-XX) =================== diff --git a/docs/releases/3.2.rst b/docs/releases/3.2.rst index d4418023ef..20e9f26051 100644 --- a/docs/releases/3.2.rst +++ b/docs/releases/3.2.rst @@ -172,6 +172,7 @@ Other changes Reduces clutter and unpredictable column size. * Remove the full name from the user list. * Add the first name and last name to the user list. +* Add file metadata app. Removals -------- diff --git a/mayan/apps/file_metadata/__init__.py b/mayan/apps/file_metadata/__init__.py new file mode 100644 index 0000000000..269e44dabe --- /dev/null +++ b/mayan/apps/file_metadata/__init__.py @@ -0,0 +1,3 @@ +from __future__ import unicode_literals + +default_app_config = 'mayan.apps.file_metadata.apps.FileMetadataApp' diff --git a/mayan/apps/file_metadata/admin.py b/mayan/apps/file_metadata/admin.py new file mode 100644 index 0000000000..056143ec82 --- /dev/null +++ b/mayan/apps/file_metadata/admin.py @@ -0,0 +1,15 @@ +from __future__ import unicode_literals + +from django.contrib import admin +from django.utils.translation import ugettext_lazy as _ + +from .models import StoredDriver + + +@admin.register(StoredDriver) +class StoredDriverAdmin(admin.ModelAdmin): + list_display = ('internal_name', 'get_label', 'driver_path') + + def get_label(self, instance): + return instance.driver_label + get_label.short_description = _('Label') diff --git a/mayan/apps/file_metadata/apps.py b/mayan/apps/file_metadata/apps.py new file mode 100644 index 0000000000..3bcaa07aa0 --- /dev/null +++ b/mayan/apps/file_metadata/apps.py @@ -0,0 +1,210 @@ +from __future__ import unicode_literals + +from django.apps import apps +from django.db.models.signals import post_save +from django.utils.translation import ugettext_lazy as _ + +from kombu import Exchange, Queue + +from mayan.apps.acls.classes import ModelPermission +from mayan.apps.common.apps import MayanAppConfig +from mayan.apps.common.classes import ModelAttribute, ModelField +from mayan.apps.common.menus import ( + menu_facet, menu_multi_item, menu_object, menu_secondary, menu_tools +) +from mayan.apps.document_indexing.handlers import handler_index_document +from mayan.apps.documents.search import document_page_search, document_search +from mayan.apps.documents.signals import post_version_upload +from mayan.apps.events import ModelEventType +from mayan.apps.navigation import SourceColumn +from mayan.celery import app + +from .drivers import * # NOQA +from .events import ( + event_file_metadata_document_version_finish, + event_file_metadata_document_version_submit +) +from .handlers import ( + handler_initialize_new_document_type_settings, + handler_process_document_version +) +from .links import ( + link_document_driver_list, link_document_file_metadata_list, + link_document_submit, link_document_multiple_submit, + link_document_type_file_metadata_settings, link_document_type_submit +) +from .methods import ( + method_document_submit, method_document_version_submit, + method_get_document_file_metadata, + method_get_document_version_file_metadata +) +from .permissions import ( + permission_document_type_file_metadata_setup, + permission_file_metadata_submit, permission_file_metadata_view +) +from .signals import post_document_version_file_metadata_processing + + +class FileMetadataApp(MayanAppConfig): + app_namespace = 'file_metadata' + app_url = 'file_metadata' + has_test = True + name = 'mayan.apps.file_metadata' + verbose_name = _('File metadata') + + def ready(self): + super(FileMetadataApp, self).ready() + + FileMetadataEntry = self.get_model(model_name='FileMetadataEntry') + DocumentVersionDriverEntry = self.get_model( + model_name='DocumentVersionDriverEntry' + ) + Document = apps.get_model( + app_label='documents', model_name='Document' + ) + DocumentTypeSettings = self.get_model( + model_name='DocumentTypeSettings' + ) + DocumentType = apps.get_model( + app_label='documents', model_name='DocumentType' + ) + DocumentVersion = apps.get_model( + app_label='documents', model_name='DocumentVersion' + ) + + Document.add_to_class( + name='submit_for_file_metadata_processing', + value=method_document_submit + ) + Document.add_to_class( + name='get_file_metadata', + value=method_get_document_file_metadata + ) + DocumentVersion.add_to_class( + name='get_file_metadata', + value=method_get_document_version_file_metadata + ) + DocumentVersion.add_to_class( + name='submit_for_file_metadata_processing', + value=method_document_version_submit + ) + + ModelAttribute(model=Document, name='get_file_metadata') + + ModelEventType.register( + model=Document, event_types=( + event_file_metadata_document_version_finish, + event_file_metadata_document_version_submit + ) + ) + + ModelField( + label=_('File metadata key'), model=Document, + name='versions__file_metadata_drivers__entries__key', + ) + ModelField( + label=_('File metadata key'), model=Document, + name='versions__file_metadata_drivers__entries__value', + ) + + ModelPermission.register( + model=Document, permissions=( + permission_file_metadata_submit, permission_file_metadata_view, + ) + ) + ModelPermission.register( + model=DocumentType, permissions=( + permission_document_type_file_metadata_setup, + permission_file_metadata_submit + ) + ) + ModelPermission.register_inheritance( + model=DocumentTypeSettings, related='document_type', + ) + ModelPermission.register_inheritance( + model=DocumentVersionDriverEntry, related='document_version', + ) + + SourceColumn(attribute='key', source=FileMetadataEntry) + SourceColumn(attribute='value', source=FileMetadataEntry) + SourceColumn( + attribute='driver', source=DocumentVersionDriverEntry + ) + SourceColumn( + attribute='driver__internal_name', + source=DocumentVersionDriverEntry + ) + SourceColumn( + attribute='get_attribute_count', source=DocumentVersionDriverEntry + ) + + app.conf.CELERY_QUEUES.append( + Queue( + 'file_metadata', Exchange('file_metadata'), + routing_key='file_metadata' + ), + ) + + app.conf.CELERY_ROUTES.update( + { + 'mayan.apps.file_metadata.tasks.task_process_document_version': { + 'queue': 'file_metadata' + }, + } + ) + + document_search.add_model_field( + field='versions__file_metadata_drivers__entries__key', + label=_('File metadata key') + ) + document_search.add_model_field( + field='versions__file_metadata_drivers__entries__value', + label=_('File metadata value') + ) + + document_page_search.add_model_field( + field='document_version__file_metadata_drivers__entries__key', + label=_('File metadata key') + ) + document_page_search.add_model_field( + field='document_version__file_metadata_drivers__entries__value', + label=_('File metadata value') + ) + + menu_facet.bind_links( + links=(link_document_driver_list,), sources=(Document,) + ) + menu_object.bind_links( + links=(link_document_type_file_metadata_settings,), + sources=(DocumentType,) + ) + menu_object.bind_links( + links=(link_document_file_metadata_list,), + sources=(DocumentVersionDriverEntry,) + ) + menu_multi_item.bind_links( + links=(link_document_multiple_submit,), sources=(Document,) + ) + menu_secondary.bind_links( + links=(link_document_submit,), sources=( + 'file_metadata:document_driver_list', + 'file_metadata:document_version_driver_file_metadata_list' + ) + ) + menu_tools.bind_links( + links=(link_document_type_submit,), + ) + post_save.connect( + dispatch_uid='file_metadata_handler_initialize_new_document_type_settings', + receiver=handler_initialize_new_document_type_settings, + sender=DocumentType + ) + post_version_upload.connect( + dispatch_uid='file_metadata_handler_process_document_version', + receiver=handler_process_document_version, sender=DocumentVersion + ) + post_document_version_file_metadata_processing.connect( + dispatch_uid='file_metadata_handler_index_document', + receiver=handler_index_document, + sender=DocumentVersion + ) diff --git a/mayan/apps/file_metadata/classes.py b/mayan/apps/file_metadata/classes.py new file mode 100644 index 0000000000..e1a789c812 --- /dev/null +++ b/mayan/apps/file_metadata/classes.py @@ -0,0 +1,84 @@ +from __future__ import unicode_literals + +import logging + +from django.apps import apps +from django.db import transaction + +from .events import event_file_metadata_document_version_finish +from .exceptions import FileMetadataDriverError +from .signals import post_document_version_file_metadata_processing + +logger = logging.getLogger(__name__) + + +class FileMetadataDriver(object): + _registry = {} + + @classmethod + def process_document_version(cls, document_version): + for driver_class in cls._registry.get(document_version.mimetype, ()): + try: + driver = driver_class() + + with transaction.atomic(): + driver.process(document_version=document_version) + event_file_metadata_document_version_finish.commit( + action_object=document_version.document, + target=document_version + ) + + post_document_version_file_metadata_processing.send( + sender=document_version.__class__, + instance=document_version + ) + except FileMetadataDriverError: + # If driver raises error, try next in the list + pass + else: + # If driver was successfull there is no need to try + # others in the list for this mimetype + return + + @classmethod + def register(cls, mimetypes): + for mimetype in mimetypes: + cls._registry.setdefault(mimetype, []).append(cls) + + def process(self, document_version): + logger.info( + 'Starting processing document version: %s', document_version + ) + + StoredDriver = apps.get_model( + app_label='file_metadata', model_name='StoredDriver' + ) + + driver_path = '.'.join([self.__module__, self.__class__.__name__]) + + driver, created = StoredDriver.objects.get_or_create( + driver_path=driver_path, defaults={ + 'internal_name': self.internal_name + } + ) + + driver.driver_entries.filter( + document_version=document_version + ).delete() + + document_version_driver_entry = driver.driver_entries.create( + document_version=document_version + ) + + results = self._process(document_version=document_version) or {} + + for key, value in results.items(): + document_version_driver_entry.entries.create( + key=key, value=value + ) + + def _process(self, document_version): + raise NotImplementedError( + 'Your %s class has not defined the required ' + 'process_document_version() method.' % self.__class__.__name__ + ) diff --git a/mayan/apps/file_metadata/drivers/__init__.py b/mayan/apps/file_metadata/drivers/__init__.py new file mode 100644 index 0000000000..32957e5011 --- /dev/null +++ b/mayan/apps/file_metadata/drivers/__init__.py @@ -0,0 +1 @@ +from .exiftool import * # NOQA diff --git a/mayan/apps/file_metadata/drivers/exiftool.py b/mayan/apps/file_metadata/drivers/exiftool.py new file mode 100644 index 0000000000..6c83f20ee4 --- /dev/null +++ b/mayan/apps/file_metadata/drivers/exiftool.py @@ -0,0 +1,76 @@ +from __future__ import unicode_literals + +import json +import logging + +import sh + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.storage.utils import fs_cleanup, mkstemp + +from ..classes import FileMetadataDriver +from ..settings import setting_drivers_arguments + +logger = logging.getLogger(__name__) + + +class EXIFToolDriver(FileMetadataDriver): + label = _('EXIF Tool') + internal_name = 'exiftool' + + def __init__(self, *args, **kwargs): + try: + self.command_exiftool = sh.Command( + setting_drivers_arguments.value['exif_driver']['exiftool_path'] + ) + except sh.CommandNotFound: + self.command_exiftool = None + else: + self.command_exiftool = self.command_exiftool.bake('-j') + + def _process(self, document_version): + if self.command_exiftool: + new_file_object, temp_filename = mkstemp() + + try: + document_version.save_to_file(filepath=temp_filename) + result = self.command_exiftool(temp_filename) + return json.loads(s=result.stdout)[0] + finally: + fs_cleanup(filename=temp_filename) + else: + logger.warning( + 'EXIFTool binary not found, not processing document version: %s', + document_version + ) + + +EXIFToolDriver.register( + mimetypes=( + 'application/msword', + 'application/pdf', + 'application/vnd.oasis.opendocument.text', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'application/x-bittorrent', + 'application/x-gzip', + 'application/x-rar-compressed', + 'application/x-shockwave-flash', + 'application/zip', + 'application/zip', + 'audio/x-pn-realaudio-plugin', + 'audio/x-wav', + 'image/jpeg', + 'image/png', + 'image/svg+xml', + 'image/tiff', + 'image/x-portable-pixmap', + 'text/html', + 'text/rtf', + 'text/x-sh', + 'video/mp4', + 'video/webm', + 'video/x-flv', + 'video/x-matroska' + ) +) diff --git a/mayan/apps/file_metadata/events.py b/mayan/apps/file_metadata/events.py new file mode 100644 index 0000000000..57daccffe2 --- /dev/null +++ b/mayan/apps/file_metadata/events.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import, unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.events import EventTypeNamespace + +namespace = EventTypeNamespace( + label=_('File metadata'), name='file_metadata' +) + +event_file_metadata_document_version_submit = namespace.add_event_type( + label=_('Document version submitted for file metadata processing'), + name='document_version_submit' +) +event_file_metadata_document_version_finish = namespace.add_event_type( + label=_('Document version file metadata processing finished'), + name='document_version_finish' +) diff --git a/mayan/apps/file_metadata/exceptions.py b/mayan/apps/file_metadata/exceptions.py new file mode 100644 index 0000000000..a778a2313d --- /dev/null +++ b/mayan/apps/file_metadata/exceptions.py @@ -0,0 +1,9 @@ +from __future__ import unicode_literals + + +class FileMetadataError(Exception): + """Base file metadata driver exception""" + + +class FileMetadataDriverError(FileMetadataError): + """Exception raised when a driver encounters an unexpected error""" diff --git a/mayan/apps/file_metadata/handlers.py b/mayan/apps/file_metadata/handlers.py new file mode 100644 index 0000000000..84d304f975 --- /dev/null +++ b/mayan/apps/file_metadata/handlers.py @@ -0,0 +1,21 @@ +from __future__ import unicode_literals + +from django.apps import apps + +from .settings import setting_auto_process + + +def handler_initialize_new_document_type_settings(sender, instance, **kwargs): + DocumentTypeSettings = apps.get_model( + app_label='file_metadata', model_name='DocumentTypeSettings' + ) + + if kwargs['created']: + DocumentTypeSettings.objects.create( + auto_process=setting_auto_process.value, document_type=instance + ) + + +def handler_process_document_version(sender, instance, **kwargs): + if instance.document.document_type.file_metadata_settings.auto_process: + instance.submit_for_file_metadata_processing() diff --git a/mayan/apps/file_metadata/icons.py b/mayan/apps/file_metadata/icons.py new file mode 100644 index 0000000000..4a52fa32ba --- /dev/null +++ b/mayan/apps/file_metadata/icons.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import, unicode_literals + +from mayan.apps.appearance.classes import Icon + +icon_document_submit = Icon( + driver_name='fontawesome', symbol='chess-board' +) +icon_document_multiple_submit = Icon( + driver_name='fontawesome', symbol='chess-board' +) +icon_file_metadata = Icon( + driver_name='fontawesome', symbol='chess-board' +) diff --git a/mayan/apps/file_metadata/links.py b/mayan/apps/file_metadata/links.py new file mode 100644 index 0000000000..f02e6e2eff --- /dev/null +++ b/mayan/apps/file_metadata/links.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.navigation import Link + +from .icons import ( + icon_document_submit, icon_document_multiple_submit, icon_file_metadata +) +from .permissions import ( + permission_document_type_file_metadata_setup, + permission_file_metadata_submit, permission_file_metadata_view +) + +link_document_driver_list = Link( + icon_class=icon_file_metadata, + kwargs={'document_id': 'resolved_object.id'}, + permissions=(permission_file_metadata_view,), text=_('File metadata'), + view='file_metadata:document_driver_list' +) +link_document_file_metadata_list = Link( + icon_class=icon_file_metadata, + kwargs={'document_version_driver_id': 'resolved_object.id'}, + permissions=(permission_file_metadata_view,), text=_('Attributes'), + view='file_metadata:document_version_driver_file_metadata_list' +) +link_document_submit = Link( + icon_class=icon_document_submit, + kwargs={'document_id': 'resolved_object.id'}, + permissions=(permission_file_metadata_submit,), + text=_('Submit for file metadata'), view='file_metadata:document_submit' +) +link_document_multiple_submit = Link( + icon_class=icon_document_multiple_submit, text=_('Submit for file metadata'), + view='file_metadata:document_multiple_submit' +) +link_document_type_file_metadata_settings = Link( + icon_class=icon_file_metadata, + kwargs={'document_type_id': 'resolved_object.id'}, + permissions=(permission_document_type_file_metadata_setup,), + text=_('Setup file metadata'), view='file_metadata:document_type_settings' +) +link_document_type_submit = Link( + icon_class=icon_file_metadata, + permissions=(permission_file_metadata_submit,), + text=_('File metadata processing per type'), + view='file_metadata:document_type_submit' +) diff --git a/mayan/apps/file_metadata/literals.py b/mayan/apps/file_metadata/literals.py new file mode 100644 index 0000000000..3cfea47b34 --- /dev/null +++ b/mayan/apps/file_metadata/literals.py @@ -0,0 +1,3 @@ +from __future__ import unicode_literals + +DEFAULT_EXIF_PATH = '/usr/bin/exiftool' diff --git a/mayan/apps/file_metadata/managers.py b/mayan/apps/file_metadata/managers.py new file mode 100644 index 0000000000..a3a5498cd4 --- /dev/null +++ b/mayan/apps/file_metadata/managers.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +import logging + +from django.apps import apps +from django.db import models + +logger = logging.getLogger(__name__) + + +class DocumentTypeSettingsManager(models.Manager): + def get_by_natural_key(self, document_type_natural_key): + DocumentType = apps.get_model( + app_label='documents', model_name='DocumentType' + ) + try: + document_type = DocumentType.objects.get_by_natural_key( + document_type_natural_key + ) + except DocumentType.DoesNotExist: + raise self.model.DoesNotExist + + return self.get(document_type__pk=document_type.pk) diff --git a/mayan/apps/file_metadata/methods.py b/mayan/apps/file_metadata/methods.py new file mode 100644 index 0000000000..36080a6530 --- /dev/null +++ b/mayan/apps/file_metadata/methods.py @@ -0,0 +1,63 @@ +from __future__ import unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from .events import event_file_metadata_document_version_submit +from .tasks import task_process_document_version + + +def method_document_submit(self): + latest_version = self.latest_version + # Don't error out if document has no version + if latest_version: + latest_version.submit_for_file_metadata_processing() + + +def method_document_version_submit(self): + event_file_metadata_document_version_submit.commit( + action_object=self.document, target=self + ) + + task_process_document_version.apply_async( + kwargs={ + 'document_version_id': self.pk, + } + ) + + +def method_get_document_file_metadata(self, dotted_name): + latest_version = self.latest_version + # Don't error out if document has no version + if latest_version: + return latest_version.get_file_metadata( + dotted_name=dotted_name + ) + + +method_get_document_file_metadata.short_description = _( + 'get_file_metadata(< file metadata dotted path >)' +) +method_get_document_file_metadata.help_text = _( + 'Return the specified document file metadata entry.' +) + + +def method_get_document_version_file_metadata(self, dotted_name): + driver_internal_name, key = dotted_name.split('.') + + try: + document_driver = self.file_metadata_drivers.get( + driver__internal_name=driver_internal_name + ) + except self.file_metadata_drivers.model.DoesNotExist: + return + else: + try: + return document_driver.entries.get(key=key).value + except document_driver.entries.model.DoesNotExist: + return + + +method_get_document_version_file_metadata.help_text = _( + 'Return the specified document version file metadata entry.' +) diff --git a/mayan/apps/file_metadata/migrations/0001_initial.py b/mayan/apps/file_metadata/migrations/0001_initial.py new file mode 100644 index 0000000000..6ac72ff955 --- /dev/null +++ b/mayan/apps/file_metadata/migrations/0001_initial.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.20 on 2019-04-24 08:42 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('documents', '0047_auto_20180917_0737'), + ] + + operations = [ + migrations.CreateModel( + name='DocumentTypeSettings', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('auto_process', models.BooleanField(default=True, verbose_name='Automatically queue newly created documents for processing.')), + ('document_type', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='file_metadata_settings', to='documents.DocumentType', verbose_name='Document type')), + ], + options={ + 'verbose_name': 'Document type settings', + 'verbose_name_plural': 'Document types settings', + }, + ), + migrations.CreateModel( + name='DocumentVersionDriverEntry', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('document_version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='file_metadata_drivers', to='documents.DocumentVersion', verbose_name='Document version')), + ], + options={ + 'ordering': ('document_version', 'driver'), + 'verbose_name': 'Document version driver entry', + 'verbose_name_plural': 'Document version driver entries', + }, + ), + migrations.CreateModel( + name='FileMetadataEntry', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('key', models.CharField(db_index=True, help_text='Name of the file metadata entry.', max_length=255, verbose_name='Key')), + ('value', models.CharField(db_index=True, help_text='Value of the file metadata entry.', max_length=255, verbose_name='Value')), + ('document_version_driver_entry', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='entries', to='file_metadata.DocumentVersionDriverEntry', verbose_name='Document version driver entry')), + ], + options={ + 'ordering': ('key', 'value'), + 'verbose_name': 'File metadata entry', + 'verbose_name_plural': 'File metadata entries', + }, + ), + migrations.CreateModel( + name='StoredDriver', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('driver_path', models.CharField(max_length=255, verbose_name='Driver path')), + ('internal_name', models.CharField(db_index=True, max_length=128, verbose_name='Internal name')), + ], + options={ + 'ordering': ('internal_name',), + 'verbose_name': 'Driver', + 'verbose_name_plural': 'Drivers', + }, + ), + migrations.AddField( + model_name='documentversiondriverentry', + name='driver', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='driver_entries', to='file_metadata.StoredDriver', verbose_name='Driver'), + ), + migrations.AlterUniqueTogether( + name='documentversiondriverentry', + unique_together=set([('driver', 'document_version')]), + ), + ] diff --git a/mayan/apps/file_metadata/migrations/__init__.py b/mayan/apps/file_metadata/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mayan/apps/file_metadata/models.py b/mayan/apps/file_metadata/models.py new file mode 100644 index 0000000000..5d07b194df --- /dev/null +++ b/mayan/apps/file_metadata/models.py @@ -0,0 +1,119 @@ +from __future__ import unicode_literals + +import logging + +from django.db import models +from django.utils.encoding import python_2_unicode_compatible +from django.utils.functional import cached_property +from django.utils.module_loading import import_string +from django.utils.translation import force_text +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.documents.models import DocumentType, DocumentVersion + +from .managers import DocumentTypeSettingsManager + +logger = logging.getLogger(__name__) + + +@python_2_unicode_compatible +class StoredDriver(models.Model): + driver_path = models.CharField( + max_length=255, verbose_name=_('Driver path') + ) + internal_name = models.CharField( + db_index=True, max_length=128, verbose_name=_('Internal name') + ) + + class Meta: + ordering = ('internal_name',) + verbose_name = _('Driver') + verbose_name_plural = _('Drivers') + + def __str__(self): + return force_text(self.driver_label) + + @cached_property + def driver_class(self): + return import_string(self.driver_path) + + @cached_property + def driver_label(self): + return self.driver_class.label + + +@python_2_unicode_compatible +class DocumentVersionDriverEntry(models.Model): + driver = models.ForeignKey( + related_name='driver_entries', to=StoredDriver, + verbose_name=_('Driver') + ) + document_version = models.ForeignKey( + related_name='file_metadata_drivers', to=DocumentVersion, + verbose_name=_('Document version') + ) + + class Meta: + ordering = ('document_version', 'driver') + unique_together = ('driver', 'document_version') + verbose_name = _('Document version driver entry') + verbose_name_plural = _('Document version driver entries') + + def __str__(self): + return force_text(self.driver) + + def get_attribute_count(self): + return self.entries.count() + get_attribute_count.short_description = _('Attribute count') + + +class DocumentTypeSettings(models.Model): + """ + Model to store the file metadata settings for a document type. + """ + document_type = models.OneToOneField( + on_delete=models.CASCADE, related_name='file_metadata_settings', + to=DocumentType, unique=True, verbose_name=_('Document type') + ) + auto_process = models.BooleanField( + default=True, verbose_name=_( + 'Automatically queue newly created documents for processing.' + ) + ) + + objects = DocumentTypeSettingsManager() + + class Meta: + verbose_name = _('Document type settings') + verbose_name_plural = _('Document types settings') + + def natural_key(self): + return self.document_type.natural_key() + natural_key.dependencies = ['documents.DocumentType'] + + +@python_2_unicode_compatible +class FileMetadataEntry(models.Model): + document_version_driver_entry = models.ForeignKey( + related_name='entries', to=DocumentVersionDriverEntry, + verbose_name=_('Document version driver entry') + ) + + key = models.CharField( + db_index=True, help_text=_('Name of the file metadata entry.'), + max_length=255, verbose_name=_('Key') + ) + value = models.CharField( + db_index=True, help_text=_('Value of the file metadata entry.'), + max_length=255, verbose_name=_('Value') + ) + + class Meta: + ordering = ('key', 'value') + verbose_name = _('File metadata entry') + verbose_name_plural = _('File metadata entries') + + def __str__(self): + return '{}: {}: {}'.format( + self.document_version_driver_entry, self.key, self.value + ) diff --git a/mayan/apps/file_metadata/permissions.py b/mayan/apps/file_metadata/permissions.py new file mode 100644 index 0000000000..4251531074 --- /dev/null +++ b/mayan/apps/file_metadata/permissions.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import, unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.permissions import PermissionNamespace + +namespace = PermissionNamespace(label=_('File metadata'), name='file_metadata') + +permission_document_type_file_metadata_setup = namespace.add_permission( + label=_('Change document type file metadata settings'), + name='file_metadata_document_type_setup' +) +permission_file_metadata_submit = namespace.add_permission( + label=_( + 'Submit document for file metadata processing' + ), name='file_metadata_submit' +) +permission_file_metadata_view = namespace.add_permission( + label=_('View file metadata'), name='file_metadata_view' +) diff --git a/mayan/apps/file_metadata/queue.py b/mayan/apps/file_metadata/queue.py new file mode 100644 index 0000000000..39e264579d --- /dev/null +++ b/mayan/apps/file_metadata/queue.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import, unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.task_manager.classes import CeleryQueue + +queue = CeleryQueue( + label=_('File metadata'), name='file_metadata' +) +queue.add_task_type( + label=_('Process document version'), + name='mayan.apps.file_metadata.tasks.task_process_document_version' +) diff --git a/mayan/apps/file_metadata/settings.py b/mayan/apps/file_metadata/settings.py new file mode 100644 index 0000000000..ceb98a3928 --- /dev/null +++ b/mayan/apps/file_metadata/settings.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.smart_settings import Namespace + +from .literals import DEFAULT_EXIF_PATH + +namespace = Namespace(label=_('File metadata'), name='file_metadata') + +setting_drivers_arguments = namespace.add_setting( + global_name='FILE_METADATA_DRIVERS_ARGUMENTS', + default={'exif_driver': {'exiftool_path': DEFAULT_EXIF_PATH}}, help_text=_( + 'Arguments to pass to the drivers.' + ) +) +setting_auto_process = namespace.add_setting( + global_name='FILE_METADATA_AUTO_PROCESS', default=True, + help_text=_( + 'Set new document types to perform file metadata processing ' + 'automatically by default.' + ) +) diff --git a/mayan/apps/file_metadata/signals.py b/mayan/apps/file_metadata/signals.py new file mode 100644 index 0000000000..81fbd2ced9 --- /dev/null +++ b/mayan/apps/file_metadata/signals.py @@ -0,0 +1,7 @@ +from __future__ import unicode_literals + +from django.dispatch import Signal + +post_document_version_file_metadata_processing = Signal( + providing_args=('instance',), use_caching=True +) diff --git a/mayan/apps/file_metadata/tasks.py b/mayan/apps/file_metadata/tasks.py new file mode 100644 index 0000000000..25e7a3167b --- /dev/null +++ b/mayan/apps/file_metadata/tasks.py @@ -0,0 +1,24 @@ +from __future__ import unicode_literals + +import logging + +from django.apps import apps + +from mayan.celery import app + +from .classes import FileMetadataDriver + +logger = logging.getLogger(__name__) + + +@app.task(ignore_result=True) +def task_process_document_version(document_version_id): + DocumentVersion = apps.get_model( + app_label='documents', model_name='DocumentVersion' + ) + + document_version = DocumentVersion.objects.get(pk=document_version_id) + + FileMetadataDriver.process_document_version( + document_version=document_version + ) diff --git a/mayan/apps/file_metadata/tests/__init__.py b/mayan/apps/file_metadata/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mayan/apps/file_metadata/tests/literals.py b/mayan/apps/file_metadata/tests/literals.py new file mode 100644 index 0000000000..295cb5a4b0 --- /dev/null +++ b/mayan/apps/file_metadata/tests/literals.py @@ -0,0 +1,10 @@ +from __future__ import unicode_literals + +TEST_DRIVER_INTERNAL_NAME = 'exiftool' +TEST_FILE_METADATA_KEY = 'FileType' +TEST_FILE_METADATA_VALUE = 'PNG' +TEST_FILE_METADATA_INDEX_NODE_TEMPLATE = "{{{{ document.get_file_metadata('{}.{}')}}}}".format( + TEST_DRIVER_INTERNAL_NAME, TEST_FILE_METADATA_KEY +) +TEST_PDF_FILE_METADATA_DOTTED_NAME = 'exiftool.Producer' +TEST_PDF_FILE_METADATA_VALUE = 'pdfTeX-1.40.10' diff --git a/mayan/apps/file_metadata/tests/test_classes.py b/mayan/apps/file_metadata/tests/test_classes.py new file mode 100644 index 0000000000..a754d556e6 --- /dev/null +++ b/mayan/apps/file_metadata/tests/test_classes.py @@ -0,0 +1,21 @@ +from __future__ import unicode_literals + +from mayan.apps.common.tests import BaseTestCase +from mayan.apps.documents.tests import ( + TEST_PDF_DOCUMENT_FILENAME, DocumentTestMixin +) + +from .literals import ( + TEST_PDF_FILE_METADATA_DOTTED_NAME, TEST_PDF_FILE_METADATA_VALUE +) + + +class EXIFToolDriverTestCase(DocumentTestMixin, BaseTestCase): + test_document_filename = TEST_PDF_DOCUMENT_FILENAME + + def test_driver_entries(self): + self.document.submit_for_file_metadata_processing() + value = self.document.get_file_metadata( + dotted_name=TEST_PDF_FILE_METADATA_DOTTED_NAME + ) + self.assertEqual(value, TEST_PDF_FILE_METADATA_VALUE) diff --git a/mayan/apps/file_metadata/tests/test_events.py b/mayan/apps/file_metadata/tests/test_events.py new file mode 100644 index 0000000000..a577ed4103 --- /dev/null +++ b/mayan/apps/file_metadata/tests/test_events.py @@ -0,0 +1,40 @@ +from __future__ import unicode_literals + +from actstream.models import Action + +from mayan.apps.documents.tests.test_models import GenericDocumentTestCase + +from ..events import ( + event_file_metadata_document_version_finish, + event_file_metadata_document_version_submit +) + + +class FileMetadataEventsTestCase(GenericDocumentTestCase): + def test_document_version_finish_event(self): + Action.objects.all().delete() + self.document.latest_version.submit_for_file_metadata_processing() + + # Get the most recent action + action = Action.objects.order_by('-timestamp').first() + + self.assertEqual( + action.target, self.document.latest_version + ) + self.assertEqual( + action.verb, event_file_metadata_document_version_finish.id + ) + + def test_document_version_submit_event(self): + Action.objects.all().delete() + self.document.latest_version.submit_for_file_metadata_processing() + + # Get the oldest action + action = Action.objects.order_by('-timestamp').last() + + self.assertEqual( + action.target, self.document.latest_version + ) + self.assertEqual( + action.verb, event_file_metadata_document_version_submit.id + ) diff --git a/mayan/apps/file_metadata/tests/test_indexing.py b/mayan/apps/file_metadata/tests/test_indexing.py new file mode 100644 index 0000000000..be0a0b474a --- /dev/null +++ b/mayan/apps/file_metadata/tests/test_indexing.py @@ -0,0 +1,32 @@ +from __future__ import unicode_literals + +from mayan.apps.common.tests import BaseTestCase +from mayan.apps.document_indexing.models import Index, IndexInstanceNode +from mayan.apps.document_indexing.tests.literals import TEST_INDEX_LABEL +from mayan.apps.documents.tests import DocumentTestMixin + +from .literals import ( + TEST_FILE_METADATA_INDEX_NODE_TEMPLATE, TEST_FILE_METADATA_VALUE +) + + +class IndexingTestCase(DocumentTestMixin, BaseTestCase): + auto_upload_document = False + + def test_indexing(self): + index = Index.objects.create(label=TEST_INDEX_LABEL) + + index.document_types.add(self.document_type) + + root = index.template_root + index.node_templates.create( + parent=root, expression=TEST_FILE_METADATA_INDEX_NODE_TEMPLATE, + link_documents=True + ) + self.document = self.upload_document() + self.document.submit_for_file_metadata_processing() + self.assertTrue( + self.document in IndexInstanceNode.objects.get( + value=TEST_FILE_METADATA_VALUE + ).documents.all() + ) diff --git a/mayan/apps/file_metadata/tests/test_views.py b/mayan/apps/file_metadata/tests/test_views.py new file mode 100644 index 0000000000..d2dbe87add --- /dev/null +++ b/mayan/apps/file_metadata/tests/test_views.py @@ -0,0 +1,156 @@ +from __future__ import unicode_literals + +from django.test import override_settings + +from mayan.apps.documents.tests import GenericDocumentViewTestCase + +from ..permissions import ( + permission_document_type_file_metadata_setup, + permission_file_metadata_submit, permission_file_metadata_view +) + +from .literals import TEST_FILE_METADATA_KEY + + +@override_settings(FILE_METADATA_AUTO_PROCESS=True) +class FileMetadataViewsTestCase(GenericDocumentViewTestCase): + def _request_document_version_driver_list_view(self): + return self.get( + viewname='file_metadata:document_driver_list', + kwargs={'document_id': self.document.pk} + ) + + def test_document_version_driver_list_view_no_permission(self): + response = self._request_document_version_driver_list_view() + self.assertEqual(response.status_code, 404) + + def test_document_version_driver_list_view_with_access(self): + self.grant_access( + permission=permission_file_metadata_view, obj=self.document + ) + response = self._request_document_version_driver_list_view() + self.assertContains( + response=response, text=self.document.label, status_code=200 + ) + + def _request_document_version_file_metadata_list_view(self): + return self.get( + viewname='file_metadata:document_version_driver_file_metadata_list', + kwargs={'document_version_driver_id': self.document.latest_version.file_metadata_drivers.first().pk} + ) + + def test_document_version_file_metadata_list_view_no_permission(self): + response = self._request_document_version_file_metadata_list_view() + self.assertNotContains( + response=response, text=TEST_FILE_METADATA_KEY, status_code=404 + ) + + def test_document_version_file_metadata_list_view_with_access(self): + self.grant_access( + obj=self.document, permission=permission_file_metadata_view + ) + response = self._request_document_version_file_metadata_list_view() + self.assertContains( + response=response, text=TEST_FILE_METADATA_KEY, status_code=200 + ) + + def _request_document_submit_view(self): + return self.post( + viewname='file_metadata:document_submit', + kwargs={'document_id': self.document.pk} + ) + + def test_document_submit_view_no_permission(self): + self.document.latest_version.file_metadata_drivers.all().delete() + response = self._request_document_submit_view() + self.assertEqual(response.status_code, 404) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 0 + ) + + def test_document_submit_view_with_access(self): + self.document.latest_version.file_metadata_drivers.all().delete() + self.grant_access( + permission=permission_file_metadata_submit, obj=self.document + ) + response = self._request_document_submit_view() + self.assertEqual(response.status_code, 302) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 1 + ) + + def _request_multiple_document_submit_view(self): + return self.post( + viewname='file_metadata:document_multiple_submit', + data={ + 'id_list': self.document.pk, + } + ) + + def test_multiple_document_submit_view_no_permission(self): + self.document.latest_version.file_metadata_drivers.all().delete() + response = self._request_multiple_document_submit_view() + self.assertEqual(response.status_code, 404) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 0 + ) + + def test_multiple_document_submit_view_with_access(self): + self.document.latest_version.file_metadata_drivers.all().delete() + self.grant_access( + permission=permission_file_metadata_submit, obj=self.document + ) + response = self._request_multiple_document_submit_view() + self.assertEqual(response.status_code, 302) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 1 + ) + + +class DocumentTypeViewsTestCase(GenericDocumentViewTestCase): + def setUp(self): + super(DocumentTypeViewsTestCase, self).setUp() + self.login_user() + + def _request_document_type_settings_view(self): + return self.get( + viewname='file_metadata:document_type_settings', + kwargs={'document_type_id': self.document.document_type.pk} + ) + + def test_document_type_settings_view_no_permission(self): + response = self._request_document_type_settings_view() + self.assertEqual(response.status_code, 404) + + def test_document_type_settings_view_with_access(self): + self.grant_access( + permission=permission_document_type_file_metadata_setup, + obj=self.document_type + ) + response = self._request_document_type_settings_view() + + self.assertEqual(response.status_code, 200) + + def _request_document_type_submit_view(self): + return self.post( + viewname='file_metadata:document_type_submit', data={ + 'document_type': self.document_type.pk, + } + ) + + def test_document_type_submit_view_no_permission(self): + response = self._request_document_type_submit_view() + self.assertEqual(response.status_code, 200) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 0 + ) + + def test_document_type_submit_view_with_access(self): + self.grant_access( + obj=self.document_type, permission=permission_file_metadata_submit, + ) + response = self._request_document_type_submit_view() + self.assertEqual(response.status_code, 302) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 1 + ) diff --git a/mayan/apps/file_metadata/urls.py b/mayan/apps/file_metadata/urls.py new file mode 100644 index 0000000000..5236811808 --- /dev/null +++ b/mayan/apps/file_metadata/urls.py @@ -0,0 +1,38 @@ +from __future__ import unicode_literals + +from django.conf.urls import url + +from .views import ( + DocumentDriverListView, DocumentSubmitView, DocumentTypeSettingsEditView, + DocumentTypeSubmitView, DocumentVersionDriverEntryFileMetadataListView +) + +urlpatterns = [ + url( + regex=r'^documents/(?P\d+)/drivers/$', + name='document_driver_list', view=DocumentDriverListView.as_view() + + ), + url( + regex=r'^documents/(?P\d+)/submit/$', + name='document_submit', view=DocumentSubmitView.as_view() + ), + url( + regex=r'^documents/multiple/submit/$', name='document_multiple_submit', + view=DocumentSubmitView.as_view() + ), + url( + regex=r'^document_types/(?P\d+)/ocr/settings/$', + name='document_type_settings', + view=DocumentTypeSettingsEditView.as_view() + ), + url( + regex=r'^document_types/submit/$', name='document_type_submit', + view=DocumentTypeSubmitView.as_view() + ), + url( + regex=r'^document_version_driver/(?P\d+)/attributes/$', + name='document_version_driver_file_metadata_list', + view=DocumentVersionDriverEntryFileMetadataListView.as_view() + ), +] diff --git a/mayan/apps/file_metadata/views.py b/mayan/apps/file_metadata/views.py new file mode 100644 index 0000000000..5a5c9bd621 --- /dev/null +++ b/mayan/apps/file_metadata/views.py @@ -0,0 +1,156 @@ +from __future__ import absolute_import, unicode_literals + +from django.contrib import messages +from django.http import HttpResponseRedirect +from django.shortcuts import get_object_or_404 +from django.urls import reverse_lazy +from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import ungettext + +from mayan.apps.acls.models import AccessControlList +from mayan.apps.common.generics import ( + FormView, MultipleObjectConfirmActionView, SingleObjectEditView, + SingleObjectListView +) +from mayan.apps.common.mixins import ExternalObjectMixin +from mayan.apps.documents.forms import DocumentTypeFilteredSelectForm +from mayan.apps.documents.models import Document, DocumentType + +from .icons import icon_file_metadata +from .models import DocumentVersionDriverEntry +from .permissions import ( + permission_document_type_file_metadata_setup, + permission_file_metadata_submit, permission_file_metadata_view +) + + +class DocumentDriverListView(ExternalObjectMixin, SingleObjectListView): + external_object_class = Document + external_object_permission = permission_file_metadata_view + external_object_pk_url_kwarg = 'document_id' + + def get_extra_context(self): + return { + 'hide_object': True, + 'no_results_icon': icon_file_metadata, + 'no_results_text': _( + 'File metadata are the attributes of the document\'s file. ' + 'They can range from camera information used to take a photo ' + 'to the author that created a file. File metadata are set ' + 'when the document\'s file was first created. File metadata ' + 'attributes reside in the file itself. They are not the ' + 'same as the document metadata, which are user defined and ' + 'reside in the database.' + ), + 'no_results_title': _('No file metadata available.'), + 'object': self.external_object, + 'title': _( + 'File metadata drivers for: %s' + ) % self.external_object, + } + + def get_object_list(self): + return self.external_object.latest_version.file_metadata_drivers.all() + + +class DocumentVersionDriverEntryFileMetadataListView(ExternalObjectMixin, SingleObjectListView): + external_object_class = DocumentVersionDriverEntry + external_object_permission = permission_file_metadata_view + external_object_pk_url_kwarg = 'document_version_driver_id' + + def get_extra_context(self): + return { + 'hide_object': True, + 'no_results_title': _('No file metadata available.'), + 'object': self.external_object.document_version.document, + 'title': _( + 'File metadata attribures for: %(document)s, for driver: %(driver)s' + ) % { + 'document': self.external_object.document_version.document, + 'driver': self.external_object.driver + }, + } + + def get_object_list(self): + return self.external_object.entries.all() + + +class DocumentSubmitView(MultipleObjectConfirmActionView): + model = Document + object_permission = permission_file_metadata_submit + pk_url_kwarg = 'document_id' + success_message_singular = '%(count)d document submitted to the file metadata queue.' + success_message_plural = '%(count)d documents submitted to the file metadata queue.' + + def get_extra_context(self): + queryset = self.get_queryset() + + result = { + 'title': ungettext( + singular='Submit the selected document to the file metadata queue?', + plural='Submit the selected documents to the file metadata queue?', + number=queryset.count() + ) + } + + if queryset.count() == 1: + result['object'] = queryset.first() + + return result + + def object_action(self, form, instance): + instance.submit_for_file_metadata_processing() + + +class DocumentTypeSettingsEditView(ExternalObjectMixin, SingleObjectEditView): + external_object_class = DocumentType + external_object_permission = permission_document_type_file_metadata_setup + external_object_pk_url_kwarg = 'document_type_id' + fields = ('auto_process',) + post_action_redirect = reverse_lazy(viewname='documents:document_type_list') + + def get_extra_context(self): + return { + 'object': self.external_object, + 'title': _( + 'Edit file metadata settings for document type: %s' + ) % self.external_object + } + + def get_object(self, queryset=None): + return self.external_object.file_metadata_settings + + +class DocumentTypeSubmitView(FormView): + extra_context = { + 'title': _( + 'Submit all documents of a type for file metadata processing.' + ) + } + form_class = DocumentTypeFilteredSelectForm + post_action_redirect = reverse_lazy(viewname='common:tools_list') + + def get_form_extra_kwargs(self): + return { + 'allow_multiple': True, + 'permission': permission_file_metadata_submit, + 'user': self.request.user + } + + def form_valid(self, form): + count = 0 + for document_type in form.cleaned_data['document_type']: + for document in document_type.documents.all(): + document.submit_for_file_metadata_processing() + count += 1 + + messages.success( + self.request, _( + '%(count)d documents added to the file metadata processing ' + 'queue.' + ) % { + 'count': count, + } + ) + + return HttpResponseRedirect(self.get_success_url()) diff --git a/mayan/settings/base.py b/mayan/settings/base.py index e28eac9247..42a5677fd6 100644 --- a/mayan/settings/base.py +++ b/mayan/settings/base.py @@ -112,6 +112,7 @@ INSTALLED_APPS = ( 'mayan.apps.document_signatures', 'mayan.apps.document_states', 'mayan.apps.documents', + 'mayan.apps.file_metadata', 'mayan.apps.linking', 'mayan.apps.mailer', 'mayan.apps.mayan_statistics',