diff --git a/mayan/apps/documents/tests/literals.py b/mayan/apps/documents/tests/literals.py index e883aaffe6..5b4f3edcf6 100644 --- a/mayan/apps/documents/tests/literals.py +++ b/mayan/apps/documents/tests/literals.py @@ -16,9 +16,10 @@ __all__ = ( 'TEST_NON_ASCII_COMPRESSED_DOCUMENT_FILENAME', 'TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH', 'TEST_NON_ASCII_DOCUMENT_FILENAME', 'TEST_NON_ASCII_DOCUMENT_PATH', - 'TEST_PDF_INDIRECT_ROTATE_LABEL', 'TEST_PDF_INDIRECT_ROTATE_PATH', - 'TEST_SMALL_DOCUMENT_CHECKSUM', 'TEST_SMALL_DOCUMENT_FILENAME', - 'TEST_SMALL_DOCUMENT_PATH', 'TEST_DOCUMENT_VERSION_COMMENT_EDITED', + 'TEST_PDF_DOCUMENT_FILENAME', 'TEST_PDF_INDIRECT_ROTATE_LABEL', + 'TEST_PDF_INDIRECT_ROTATE_PATH', 'TEST_SMALL_DOCUMENT_CHECKSUM', + 'TEST_SMALL_DOCUMENT_FILENAME', 'TEST_SMALL_DOCUMENT_PATH', + 'TEST_DOCUMENT_VERSION_COMMENT_EDITED', ) # Filenames @@ -39,6 +40,7 @@ TEST_MULTI_PAGE_TIFF = 'multi_page.tiff' TEST_NON_ASCII_COMPRESSED_DOCUMENT_FILENAME = 'I18N_title_áéíóúüñÑ.png.zip' TEST_NON_ASCII_DOCUMENT_FILENAME = 'I18N_title_áéíóúüñÑ.png' TEST_OFFICE_DOCUMENT = 'simple_2_page_document.doc' +TEST_PDF_DOCUMENT_FILENAME = 'mayan_11_1.pdf' TEST_PDF_INDIRECT_ROTATE_LABEL = 'indirect_rotate.pdf' TEST_SMALL_DOCUMENT_FILENAME = 'title_page.png' TEST_SMALL_DOCUMENT_CHECKSUM = 'efa10e6cc21f83078aaa94d5cbe51de67b51af706143b\ diff --git a/mayan/apps/file_metadata/__init__.py b/mayan/apps/file_metadata/__init__.py new file mode 100644 index 0000000000..269e44dabe --- /dev/null +++ b/mayan/apps/file_metadata/__init__.py @@ -0,0 +1,3 @@ +from __future__ import unicode_literals + +default_app_config = 'mayan.apps.file_metadata.apps.FileMetadataApp' diff --git a/mayan/apps/file_metadata/admin.py b/mayan/apps/file_metadata/admin.py new file mode 100644 index 0000000000..c563ce4c47 --- /dev/null +++ b/mayan/apps/file_metadata/admin.py @@ -0,0 +1,13 @@ +from __future__ import unicode_literals + +from django.contrib import admin + +from .models import StoredDriver + + +@admin.register(StoredDriver) +class StoredDriverAdmin(admin.ModelAdmin): + list_display = ('internal_name', 'label', 'driver_path') + + def label(self, instance): + return instance.driver_label diff --git a/mayan/apps/file_metadata/apps.py b/mayan/apps/file_metadata/apps.py new file mode 100644 index 0000000000..d203c448a0 --- /dev/null +++ b/mayan/apps/file_metadata/apps.py @@ -0,0 +1,187 @@ +from __future__ import unicode_literals + +from django.apps import apps +from django.db.models.signals import post_save +from django.utils.translation import ugettext_lazy as _ + +from kombu import Exchange, Queue + +from mayan.apps.acls import ModelPermission +from mayan.apps.common import ( + MayanAppConfig, menu_facet, menu_multi_item, menu_object +) +from mayan.apps.document_indexing.handlers import handler_index_document +from mayan.apps.documents.search import document_page_search, document_search +from mayan.apps.documents.signals import post_version_upload +from mayan.apps.events import ModelEventType +from mayan.apps.navigation import SourceColumn +from mayan.celery import app + +from .drivers import * # NOQA +from .events import ( + event_file_metadata_document_version_finish, + event_file_metadata_document_version_submit +) +from .handlers import ( + handler_initialize_new_document_type_settings, + handler_process_document_version +) +from .links import ( + link_document_driver_list, link_document_file_metadata_list, + link_document_submit, link_document_submit_multiple, + link_document_type_file_metadata_settings +) +from .permissions import ( + permission_document_type_file_metadata_setup, + permission_file_metadata_submit, permission_file_metadata_view +) +from .signals import post_document_version_file_metadata_processing +from .utils import ( + method_document_submit, method_document_version_submit, + method_get_document_file_metadata, + method_get_document_version_file_metadata +) + + +class FileMetadataApp(MayanAppConfig): + app_namespace = 'file_metadata' + app_url = 'file_metadata' + has_test = True + name = 'mayan.apps.file_metadata' + verbose_name = _('File metadata') + + def ready(self): + super(FileMetadataApp, self).ready() + + FileMetadataEntry = self.get_model(model_name='FileMetadataEntry') + DocumentVersionDriverEntry = self.get_model( + model_name='DocumentVersionDriverEntry' + ) + Document = apps.get_model( + app_label='documents', model_name='Document' + ) + DocumentTypeSettings = self.get_model( + model_name='DocumentTypeSettings' + ) + DocumentType = apps.get_model( + app_label='documents', model_name='DocumentType' + ) + DocumentVersion = apps.get_model( + app_label='documents', model_name='DocumentVersion' + ) + + Document.add_to_class( + name='submit_for_file_metadata_processing', + value=method_document_submit + ) + Document.add_to_class( + name='get_file_metadata', + value=method_get_document_file_metadata + ) + DocumentVersion.add_to_class( + name='submit_for_file_metadata_processing', + value=method_document_version_submit + ) + DocumentVersion.add_to_class( + name='get_file_metadata', + value=method_get_document_version_file_metadata + ) + + ModelEventType.register( + model=Document, event_types=( + event_file_metadata_document_version_finish, + event_file_metadata_document_version_submit + ) + ) + + ModelPermission.register( + model=Document, permissions=( + permission_file_metadata_submit, permission_file_metadata_view, + ) + ) + ModelPermission.register( + model=DocumentType, permissions=( + permission_document_type_file_metadata_setup, + ) + ) + ModelPermission.register_inheritance( + model=DocumentTypeSettings, related='document_type', + ) + + SourceColumn(source=FileMetadataEntry, attribute='key') + SourceColumn(source=FileMetadataEntry, attribute='value') + SourceColumn( + source=DocumentVersionDriverEntry, attribute='driver' + ) + SourceColumn( + source=DocumentVersionDriverEntry, attribute='driver__internal_name' + ) + SourceColumn( + source=DocumentVersionDriverEntry, attribute='get_attribute_count' + ) + + app.conf.task_queues.append( + Queue( + 'file_metadata', Exchange('file_metadata'), + routing_key='file_metadata' + ), + ) + + app.conf.task_routes.update( + { + 'mayan.apps.file_metadata.tasks.task_process_document_version': { + 'queue': 'file_metadata' + }, + } + ) + + document_search.add_model_field( + field='versions__file_metadata_drivers__entries__key', + label=_('File metadata key') + ) + document_search.add_model_field( + field='versions__file_metadata_drivers__entries__value', + label=_('File metadata value') + ) + + document_page_search.add_model_field( + field='document_version__file_metadata_drivers__entries__key', + label=_('File metadata key') + ) + document_page_search.add_model_field( + field='document_version__file_metadata_drivers__entries__value', + label=_('File metadata value') + ) + + menu_facet.bind_links( + links=(link_document_driver_list,), sources=(Document,) + ) + menu_object.bind_links( + links=(link_document_submit,), sources=(Document,) + ) + menu_object.bind_links( + links=(link_document_type_file_metadata_settings,), + sources=(DocumentType,) + ) + menu_object.bind_links( + links=(link_document_file_metadata_list,), + sources=(DocumentVersionDriverEntry,) + ) + menu_multi_item.bind_links( + links=(link_document_submit_multiple,), sources=(Document,) + ) + + post_save.connect( + dispatch_uid='file_metadata_handler_initialize_new_document_type_settings', + receiver=handler_initialize_new_document_type_settings, + sender=DocumentType + ) + post_version_upload.connect( + dispatch_uid='file_metadata_handler_process_document_version', + receiver=handler_process_document_version, sender=DocumentVersion + ) + post_document_version_file_metadata_processing.connect( + dispatch_uid='file_metadata_handler_index_document', + receiver=handler_index_document, + sender=DocumentVersion + ) diff --git a/mayan/apps/file_metadata/classes.py b/mayan/apps/file_metadata/classes.py new file mode 100644 index 0000000000..656ab11966 --- /dev/null +++ b/mayan/apps/file_metadata/classes.py @@ -0,0 +1,80 @@ +from __future__ import unicode_literals + +import logging + +from django.apps import apps + +from .events import event_file_metadata_document_version_finish +from .exceptions import FileMetadataDriverError +from .signals import post_document_version_file_metadata_processing + +logger = logging.getLogger(__name__) + + +class FileMetadataDriver(object): + _registry = {} + + @classmethod + def register(cls, mimetypes): + for mimetype in mimetypes: + cls._registry.setdefault(mimetype, []).append(cls) + + @classmethod + def process_document_version(cls, document_version): + for driver_class in cls._registry.get(document_version.mimetype, ()): + try: + driver = driver_class() + driver.process(document_version=document_version) + except FileMetadataDriverError: + # If driver raises error, try next in the list + pass + else: + # If driver was successfull there is no need to try + # others in the list for this mimetype + + event_file_metadata_document_version_finish.commit( + action_object=document_version.document, + target=document_version + ) + + post_document_version_file_metadata_processing.send( + sender=document_version.__class__, + instance=document_version + ) + return + + def process(self, document_version): + logger.info( + 'Starting processing document version: %s', document_version + ) + + StoredDriver = apps.get_model( + app_label='file_metadata', model_name='StoredDriver' + ) + + driver_path = '.'.join([self.__module__, self.__class__.__name__]) + + driver, created = StoredDriver.objects.get_or_create( + driver_path=driver_path, defaults={ + 'internal_name': self.internal_name + } + ) + + driver.driver_entries.filter( + document_version=document_version + ).delete() + + document_version_driver_entry = driver.driver_entries.create( + document_version=document_version + ) + + for key, value in self._process(document_version=document_version).items(): + document_version_driver_entry.entries.create( + key=key, value=value + ) + + def _process(self, document_version): + raise NotImplementedError( + 'Your %s class has not defined the required ' + 'process_document_version() method.' % self.__class__.__name__ + ) diff --git a/mayan/apps/file_metadata/drivers/__init__.py b/mayan/apps/file_metadata/drivers/__init__.py new file mode 100644 index 0000000000..32957e5011 --- /dev/null +++ b/mayan/apps/file_metadata/drivers/__init__.py @@ -0,0 +1 @@ +from .exiftool import * # NOQA diff --git a/mayan/apps/file_metadata/drivers/exiftool.py b/mayan/apps/file_metadata/drivers/exiftool.py new file mode 100644 index 0000000000..6d7de4465d --- /dev/null +++ b/mayan/apps/file_metadata/drivers/exiftool.py @@ -0,0 +1,70 @@ +from __future__ import unicode_literals + +import json +import logging + +import sh + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.common.utils import fs_cleanup, mkstemp + +from ..classes import FileMetadataDriver +from ..settings import setting_drivers_arguments + +logger = logging.getLogger(__name__) + + +class EXIFToolDriver(FileMetadataDriver): + label = _('EXIF Tool') + internal_name = 'exiftool' + + def __init__(self, *args, **kwargs): + try: + self.command_exiftool = sh.Command( + setting_drivers_arguments.value['exif_driver']['exiftool_path'] + ) + except sh.CommandNotFound: + self.command_exiftool = None + else: + self.command_exiftool = self.command_exiftool.bake('-j') + + def _process(self, document_version): + new_file_object, temp_filename = mkstemp() + + try: + document_version.save_to_file(filepath=temp_filename) + result = self.command_exiftool(temp_filename) + return json.loads(result.stdout)[0] + finally: + fs_cleanup(filename=temp_filename) + + +EXIFToolDriver.register( + mimetypes=( + 'application/msword', + 'application/pdf', + 'application/vnd.oasis.opendocument.text', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'application/x-bittorrent', + 'application/x-gzip', + 'application/x-rar-compressed', + 'application/x-shockwave-flash', + 'application/zip', + 'application/zip', + 'audio/x-pn-realaudio-plugin', + 'audio/x-wav', + 'image/jpeg', + 'image/png', + 'image/svg+xml', + 'image/tiff', + 'image/x-portable-pixmap', + 'text/html', + 'text/rtf', + 'text/x-sh', + 'video/mp4', + 'video/webm', + 'video/x-flv', + 'video/x-matroska' + ) +) diff --git a/mayan/apps/file_metadata/events.py b/mayan/apps/file_metadata/events.py new file mode 100644 index 0000000000..6e4a4d5fcb --- /dev/null +++ b/mayan/apps/file_metadata/events.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import, unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.events import EventTypeNamespace + +namespace = EventTypeNamespace( + name='file_metadata', label=_('File metadata') +) + +event_file_metadata_document_version_submit = namespace.add_event_type( + label=_('Document version submitted for file metadata processing'), + name='document_version_submit' +) +event_file_metadata_document_version_finish = namespace.add_event_type( + label=_('Document version file metadata processing finished'), + name='document_version_finish' +) diff --git a/mayan/apps/file_metadata/exceptions.py b/mayan/apps/file_metadata/exceptions.py new file mode 100644 index 0000000000..1f80c50b0d --- /dev/null +++ b/mayan/apps/file_metadata/exceptions.py @@ -0,0 +1,5 @@ +from __future__ import unicode_literals + + +class FileMetadataDriverError(Exception): + """Base file metadata driver exception""" diff --git a/mayan/apps/file_metadata/handlers.py b/mayan/apps/file_metadata/handlers.py new file mode 100644 index 0000000000..01a95417e2 --- /dev/null +++ b/mayan/apps/file_metadata/handlers.py @@ -0,0 +1,21 @@ +from __future__ import unicode_literals + +from django.apps import apps + +from .settings import setting_auto_process + + +def handler_initialize_new_document_type_settings(sender, instance, **kwargs): + DocumentTypeSettings = apps.get_model( + app_label='file_metadata', model_name='DocumentTypeSettings' + ) + + if kwargs['created']: + DocumentTypeSettings.objects.create( + document_type=instance, auto_process=setting_auto_process.value + ) + + +def handler_process_document_version(sender, instance, **kwargs): + if instance.document.document_type.file_metadata_settings.auto_process: + instance.submit_for_file_metadata_processing() diff --git a/mayan/apps/file_metadata/icons.py b/mayan/apps/file_metadata/icons.py new file mode 100644 index 0000000000..0382291179 --- /dev/null +++ b/mayan/apps/file_metadata/icons.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import, unicode_literals + +from mayan.apps.appearance.classes import Icon + +icon_file_metadata = Icon( + driver_name='fontawesome', symbol='chess-board' +) diff --git a/mayan/apps/file_metadata/links.py b/mayan/apps/file_metadata/links.py new file mode 100644 index 0000000000..e8d01b72da --- /dev/null +++ b/mayan/apps/file_metadata/links.py @@ -0,0 +1,37 @@ +from __future__ import unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.navigation import Link + +from .icons import icon_file_metadata +from .permissions import ( + permission_document_type_file_metadata_setup, + permission_file_metadata_submit, permission_file_metadata_view +) + +link_document_driver_list = Link( + args='resolved_object.id', icon_class=icon_file_metadata, + permissions=(permission_file_metadata_view,), text=_('File metadata'), + view='file_metadata:document_driver_list', +) +link_document_file_metadata_list = Link( + args=('resolved_object.id',), icon_class=icon_file_metadata, + permissions=(permission_file_metadata_view,), text=_('Attributes'), + view='file_metadata:document_version_driver_file_metadata_list', +) +link_document_submit = Link( + args='resolved_object.id', permissions=(permission_file_metadata_submit,), + text=_('Submit for file metadata'), view='file_metadata:document_submit' +) +link_document_submit_multiple = Link( + text=_('Submit for file metadata'), + view='file_metadata:document_submit_multiple' +) +link_document_type_file_metadata_settings = Link( + args='resolved_object.id', + icon_class=icon_file_metadata, + permissions=(permission_document_type_file_metadata_setup,), + text=_('Setup file metadata'), + view='file_metadata:document_type_settings', +) diff --git a/mayan/apps/file_metadata/literals.py b/mayan/apps/file_metadata/literals.py new file mode 100644 index 0000000000..3cfea47b34 --- /dev/null +++ b/mayan/apps/file_metadata/literals.py @@ -0,0 +1,3 @@ +from __future__ import unicode_literals + +DEFAULT_EXIF_PATH = '/usr/bin/exiftool' diff --git a/mayan/apps/file_metadata/managers.py b/mayan/apps/file_metadata/managers.py new file mode 100644 index 0000000000..a3a5498cd4 --- /dev/null +++ b/mayan/apps/file_metadata/managers.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +import logging + +from django.apps import apps +from django.db import models + +logger = logging.getLogger(__name__) + + +class DocumentTypeSettingsManager(models.Manager): + def get_by_natural_key(self, document_type_natural_key): + DocumentType = apps.get_model( + app_label='documents', model_name='DocumentType' + ) + try: + document_type = DocumentType.objects.get_by_natural_key( + document_type_natural_key + ) + except DocumentType.DoesNotExist: + raise self.model.DoesNotExist + + return self.get(document_type__pk=document_type.pk) diff --git a/mayan/apps/file_metadata/migrations/0001_initial.py b/mayan/apps/file_metadata/migrations/0001_initial.py new file mode 100644 index 0000000000..b9ea32e921 --- /dev/null +++ b/mayan/apps/file_metadata/migrations/0001_initial.py @@ -0,0 +1,160 @@ +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +def operation_create_setting_for_existing_document_types(apps, schema_editor): + DocumentType = apps.get_model( + app_label='documents', model_name='DocumentType' + ) + DocumentTypeSettings = apps.get_model( + app_label='file_metadata', model_name='DocumentTypeSettings' + ) + + for document_type in DocumentType.objects.using(schema_editor.connection.alias).all(): + DocumentTypeSettings.objects.using( + schema_editor.connection.alias + ).create(document_type=document_type) + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('documents', '0049_auto_20181211_0011'), + ] + + operations = [ + migrations.CreateModel( + name='DocumentTypeSettings', + fields=[ + ( + 'id', models.AutoField( + auto_created=True, primary_key=True, serialize=False, + verbose_name='ID' + ) + ), + ( + 'auto_process', models.BooleanField( + default=True, verbose_name='Automatically queue ' + 'newly created documents for processing.' + ) + ), + ( + 'document_type', models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name='file_metadata_settings', + to='documents.DocumentType', + verbose_name='Document type' + ) + ), + ], + options={ + 'verbose_name': 'Document type settings', + 'verbose_name_plural': 'Document types settings', + }, + ), + migrations.CreateModel( + name='DocumentVersionDriverEntry', + fields=[ + ( + 'id', models.AutoField( + auto_created=True, primary_key=True, serialize=False, + verbose_name='ID' + ) + ), + ( + 'document_version', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='file_metadata_drivers', + to='documents.DocumentVersion', + verbose_name='Document version' + ) + ), + ], + options={ + 'ordering': ('document_version', 'driver'), + 'verbose_name': 'Document version driver entry', + 'verbose_name_plural': 'Document version driver entries', + }, + ), + migrations.CreateModel( + name='FileMetadataEntry', + fields=[ + ( + 'id', models.AutoField( + auto_created=True, primary_key=True, serialize=False, + verbose_name='ID' + ) + ), + ( + 'key', models.CharField( + db_index=True, max_length=255, verbose_name='Key' + ) + ), + ( + 'value', models.CharField( + db_index=True, max_length=255, verbose_name='Value' + ) + ), + ( + 'document_version_driver_entry', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='entries', + to='file_metadata.DocumentVersionDriverEntry', + verbose_name='Document version driver entry' + ) + ), + ], + options={ + 'ordering': ('key', 'value'), + 'verbose_name': 'File metadata entry', + 'verbose_name_plural': 'File metadata entries', + }, + ), + migrations.CreateModel( + name='StoredDriver', + fields=[ + ( + 'id', models.AutoField( + auto_created=True, primary_key=True, serialize=False, + verbose_name='ID' + ) + ), + ( + 'driver_path', models.CharField( + max_length=255, verbose_name='Driver path' + ) + ), + ( + 'internal_name', models.CharField( + db_index=True, max_length=128, + verbose_name='Internal name' + ) + ), + ], + options={ + 'ordering': ('internal_name',), + 'verbose_name': 'Driver', + 'verbose_name_plural': 'Drivers', + }, + ), + migrations.AddField( + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='driver_entries', to='file_metadata.StoredDriver', + verbose_name='Driver' + ), + model_name='documentversiondriverentry', + name='driver' + ), + migrations.AlterUniqueTogether( + name='documentversiondriverentry', + unique_together=set([('driver', 'document_version')]), + ), + migrations.RunPython( + code=operation_create_setting_for_existing_document_types + ), + ] diff --git a/mayan/apps/file_metadata/migrations/__init__.py b/mayan/apps/file_metadata/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mayan/apps/file_metadata/models.py b/mayan/apps/file_metadata/models.py new file mode 100644 index 0000000000..77c237d5c1 --- /dev/null +++ b/mayan/apps/file_metadata/models.py @@ -0,0 +1,117 @@ +from __future__ import unicode_literals + +import logging + +from django.db import models +from django.utils.encoding import python_2_unicode_compatible +from django.utils.functional import cached_property +from django.utils.module_loading import import_string +from django.utils.translation import force_text +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.documents.models import DocumentType, DocumentVersion + +from .managers import DocumentTypeSettingsManager + +logger = logging.getLogger(__name__) + + +@python_2_unicode_compatible +class StoredDriver(models.Model): + driver_path = models.CharField( + max_length=255, verbose_name=_('Driver path') + ) + internal_name = models.CharField( + db_index=True, max_length=128, verbose_name=_('Internal name') + ) + + class Meta: + ordering = ('internal_name',) + verbose_name = _('Driver') + verbose_name_plural = _('Drivers') + + def __str__(self): + return force_text(self.driver_label) + + @cached_property + def driver_class(self): + return import_string(self.driver_path) + + @cached_property + def driver_label(self): + return self.driver_class.label + + +@python_2_unicode_compatible +class DocumentVersionDriverEntry(models.Model): + driver = models.ForeignKey( + related_name='driver_entries', to=StoredDriver, + verbose_name=_('Driver') + ) + document_version = models.ForeignKey( + related_name='file_metadata_drivers', to=DocumentVersion, + verbose_name=_('Document version') + ) + + class Meta: + ordering = ('document_version', 'driver') + unique_together = ('driver', 'document_version') + verbose_name = _('Document version driver entry') + verbose_name_plural = _('Document version driver entries') + + def __str__(self): + return force_text(self.driver) + + def get_attribute_count(self): + return self.entries.count() + get_attribute_count.short_description = _('Attribute count') + + +class DocumentTypeSettings(models.Model): + """ + Model to store the file metadata settings for a document type. + """ + document_type = models.OneToOneField( + on_delete=models.CASCADE, related_name='file_metadata_settings', + to=DocumentType, unique=True, verbose_name=_('Document type') + ) + auto_process = models.BooleanField( + default=True, verbose_name=_( + 'Automatically queue newly created documents for processing.' + ) + ) + + objects = DocumentTypeSettingsManager() + + class Meta: + verbose_name = _('Document type settings') + verbose_name_plural = _('Document types settings') + + def natural_key(self): + return self.document_type.natural_key() + natural_key.dependencies = ['documents.DocumentType'] + + +@python_2_unicode_compatible +class FileMetadataEntry(models.Model): + document_version_driver_entry = models.ForeignKey( + related_name='entries', to=DocumentVersionDriverEntry, + verbose_name=_('Document version driver entry') + ) + + key = models.CharField( + db_index=True, max_length=255, verbose_name=_('Key') + ) + value = models.CharField( + db_index=True, max_length=255, verbose_name=_('Value') + ) + + class Meta: + ordering = ('key', 'value') + verbose_name = _('File metadata entry') + verbose_name_plural = _('File metadata entries') + + def __str__(self): + return '{}: {}: {}'.format( + self.document_version_driver_entry, self.key, self.value + ) diff --git a/mayan/apps/file_metadata/permissions.py b/mayan/apps/file_metadata/permissions.py new file mode 100644 index 0000000000..9aa0c3aeee --- /dev/null +++ b/mayan/apps/file_metadata/permissions.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import, unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.permissions import PermissionNamespace + +namespace = PermissionNamespace('file_metadata', _('File metadata')) + +permission_document_type_file_metadata_setup = namespace.add_permission( + name='file_metadata_document_type_setup', + label=_('Change document type file metadata settings') +) +permission_file_metadata_submit = namespace.add_permission( + name='file_metadata_submit', label=_( + 'Submit document for file metadata processing' + ) +) +permission_file_metadata_view = namespace.add_permission( + name='file_metadata_view', label=_('View file metadata') +) diff --git a/mayan/apps/file_metadata/queue.py b/mayan/apps/file_metadata/queue.py new file mode 100644 index 0000000000..39e264579d --- /dev/null +++ b/mayan/apps/file_metadata/queue.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import, unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.task_manager.classes import CeleryQueue + +queue = CeleryQueue( + label=_('File metadata'), name='file_metadata' +) +queue.add_task_type( + label=_('Process document version'), + name='mayan.apps.file_metadata.tasks.task_process_document_version' +) diff --git a/mayan/apps/file_metadata/settings.py b/mayan/apps/file_metadata/settings.py new file mode 100644 index 0000000000..692ab7a12f --- /dev/null +++ b/mayan/apps/file_metadata/settings.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +from django.utils.translation import ugettext_lazy as _ + +from mayan.apps.smart_settings import Namespace + +from .literals import DEFAULT_EXIF_PATH + +namespace = Namespace(name='file_metadata', label=_('File metadata')) + +setting_drivers_arguments = namespace.add_setting( + global_name='FILE_METADATA_DRIVERS_ARGUMENTS', + default={'exif_driver': {'exiftool_path': DEFAULT_EXIF_PATH}}, help_text=_( + 'Arguments to pass to the drivers.' + ) +) +setting_auto_process = namespace.add_setting( + global_name='FILE_METADATA_AUTO_PROCESS', default=True, + help_text=_( + 'Set new document types to perform file metadata processing ' + 'automatically by default.' + ) +) diff --git a/mayan/apps/file_metadata/signals.py b/mayan/apps/file_metadata/signals.py new file mode 100644 index 0000000000..81fbd2ced9 --- /dev/null +++ b/mayan/apps/file_metadata/signals.py @@ -0,0 +1,7 @@ +from __future__ import unicode_literals + +from django.dispatch import Signal + +post_document_version_file_metadata_processing = Signal( + providing_args=('instance',), use_caching=True +) diff --git a/mayan/apps/file_metadata/tasks.py b/mayan/apps/file_metadata/tasks.py new file mode 100644 index 0000000000..25e7a3167b --- /dev/null +++ b/mayan/apps/file_metadata/tasks.py @@ -0,0 +1,24 @@ +from __future__ import unicode_literals + +import logging + +from django.apps import apps + +from mayan.celery import app + +from .classes import FileMetadataDriver + +logger = logging.getLogger(__name__) + + +@app.task(ignore_result=True) +def task_process_document_version(document_version_id): + DocumentVersion = apps.get_model( + app_label='documents', model_name='DocumentVersion' + ) + + document_version = DocumentVersion.objects.get(pk=document_version_id) + + FileMetadataDriver.process_document_version( + document_version=document_version + ) diff --git a/mayan/apps/file_metadata/tests/__init__.py b/mayan/apps/file_metadata/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mayan/apps/file_metadata/tests/literals.py b/mayan/apps/file_metadata/tests/literals.py new file mode 100644 index 0000000000..295cb5a4b0 --- /dev/null +++ b/mayan/apps/file_metadata/tests/literals.py @@ -0,0 +1,10 @@ +from __future__ import unicode_literals + +TEST_DRIVER_INTERNAL_NAME = 'exiftool' +TEST_FILE_METADATA_KEY = 'FileType' +TEST_FILE_METADATA_VALUE = 'PNG' +TEST_FILE_METADATA_INDEX_NODE_TEMPLATE = "{{{{ document.get_file_metadata('{}.{}')}}}}".format( + TEST_DRIVER_INTERNAL_NAME, TEST_FILE_METADATA_KEY +) +TEST_PDF_FILE_METADATA_DOTTED_NAME = 'exiftool.Producer' +TEST_PDF_FILE_METADATA_VALUE = 'pdfTeX-1.40.10' diff --git a/mayan/apps/file_metadata/tests/test_classes.py b/mayan/apps/file_metadata/tests/test_classes.py new file mode 100644 index 0000000000..a754d556e6 --- /dev/null +++ b/mayan/apps/file_metadata/tests/test_classes.py @@ -0,0 +1,21 @@ +from __future__ import unicode_literals + +from mayan.apps.common.tests import BaseTestCase +from mayan.apps.documents.tests import ( + TEST_PDF_DOCUMENT_FILENAME, DocumentTestMixin +) + +from .literals import ( + TEST_PDF_FILE_METADATA_DOTTED_NAME, TEST_PDF_FILE_METADATA_VALUE +) + + +class EXIFToolDriverTestCase(DocumentTestMixin, BaseTestCase): + test_document_filename = TEST_PDF_DOCUMENT_FILENAME + + def test_driver_entries(self): + self.document.submit_for_file_metadata_processing() + value = self.document.get_file_metadata( + dotted_name=TEST_PDF_FILE_METADATA_DOTTED_NAME + ) + self.assertEqual(value, TEST_PDF_FILE_METADATA_VALUE) diff --git a/mayan/apps/file_metadata/tests/test_events.py b/mayan/apps/file_metadata/tests/test_events.py new file mode 100644 index 0000000000..a577ed4103 --- /dev/null +++ b/mayan/apps/file_metadata/tests/test_events.py @@ -0,0 +1,40 @@ +from __future__ import unicode_literals + +from actstream.models import Action + +from mayan.apps.documents.tests.test_models import GenericDocumentTestCase + +from ..events import ( + event_file_metadata_document_version_finish, + event_file_metadata_document_version_submit +) + + +class FileMetadataEventsTestCase(GenericDocumentTestCase): + def test_document_version_finish_event(self): + Action.objects.all().delete() + self.document.latest_version.submit_for_file_metadata_processing() + + # Get the most recent action + action = Action.objects.order_by('-timestamp').first() + + self.assertEqual( + action.target, self.document.latest_version + ) + self.assertEqual( + action.verb, event_file_metadata_document_version_finish.id + ) + + def test_document_version_submit_event(self): + Action.objects.all().delete() + self.document.latest_version.submit_for_file_metadata_processing() + + # Get the oldest action + action = Action.objects.order_by('-timestamp').last() + + self.assertEqual( + action.target, self.document.latest_version + ) + self.assertEqual( + action.verb, event_file_metadata_document_version_submit.id + ) diff --git a/mayan/apps/file_metadata/tests/test_indexing.py b/mayan/apps/file_metadata/tests/test_indexing.py new file mode 100644 index 0000000000..be0a0b474a --- /dev/null +++ b/mayan/apps/file_metadata/tests/test_indexing.py @@ -0,0 +1,32 @@ +from __future__ import unicode_literals + +from mayan.apps.common.tests import BaseTestCase +from mayan.apps.document_indexing.models import Index, IndexInstanceNode +from mayan.apps.document_indexing.tests.literals import TEST_INDEX_LABEL +from mayan.apps.documents.tests import DocumentTestMixin + +from .literals import ( + TEST_FILE_METADATA_INDEX_NODE_TEMPLATE, TEST_FILE_METADATA_VALUE +) + + +class IndexingTestCase(DocumentTestMixin, BaseTestCase): + auto_upload_document = False + + def test_indexing(self): + index = Index.objects.create(label=TEST_INDEX_LABEL) + + index.document_types.add(self.document_type) + + root = index.template_root + index.node_templates.create( + parent=root, expression=TEST_FILE_METADATA_INDEX_NODE_TEMPLATE, + link_documents=True + ) + self.document = self.upload_document() + self.document.submit_for_file_metadata_processing() + self.assertTrue( + self.document in IndexInstanceNode.objects.get( + value=TEST_FILE_METADATA_VALUE + ).documents.all() + ) diff --git a/mayan/apps/file_metadata/tests/test_views.py b/mayan/apps/file_metadata/tests/test_views.py new file mode 100644 index 0000000000..0fcebf3f65 --- /dev/null +++ b/mayan/apps/file_metadata/tests/test_views.py @@ -0,0 +1,137 @@ +from __future__ import unicode_literals + +from django.test import override_settings + +from mayan.apps.documents.tests import GenericDocumentViewTestCase + +from ..permissions import ( + permission_document_type_file_metadata_setup, + permission_file_metadata_submit, permission_file_metadata_view +) + +from .literals import TEST_FILE_METADATA_KEY + + +@override_settings(FILE_METADATA_AUTO_PROCESS=True) +class FileMetadataViewsTestCase(GenericDocumentViewTestCase): + def setUp(self): + super(FileMetadataViewsTestCase, self).setUp() + self.login_user() + + def _request_document_version_driver_list_view(self): + return self.get( + args=(self.document.pk,), + viewname='file_metadata:document_driver_list', + ) + + def test_document_version_driver_list_view_no_permission(self): + response = self._request_document_version_driver_list_view() + self.assertEqual(response.status_code, 403) + + def test_document_version_driver_list_view_with_access(self): + self.grant_access( + permission=permission_file_metadata_view, obj=self.document + ) + response = self._request_document_version_driver_list_view() + self.assertContains( + response=response, text=self.document.label, status_code=200 + ) + + def _request_document_version_file_metadata_list_view(self): + return self.get( + args=( + self.document.latest_version.file_metadata_drivers.first().pk, + ), + viewname='file_metadata:document_version_driver_file_metadata_list', + ) + + def test_document_version_file_metadata_list_view_no_permission(self): + response = self._request_document_version_file_metadata_list_view() + self.assertNotContains( + response=response, text=TEST_FILE_METADATA_KEY, status_code=403 + ) + + def test_document_version_file_metadata_list_view_with_access(self): + self.grant_access( + permission=permission_file_metadata_view, obj=self.document + ) + response = self._request_document_version_file_metadata_list_view() + self.assertContains( + response=response, text=TEST_FILE_METADATA_KEY, status_code=200 + ) + + def _request_document_submit_view(self): + return self.post( + viewname='file_metadata:document_submit', args=(self.document.pk,) + ) + + def test_document_submit_view_no_permission(self): + self.document.latest_version.file_metadata_drivers.all().delete() + response = self._request_document_submit_view() + self.assertEqual(response.status_code, 302) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 0 + ) + + def test_document_submit_view_with_access(self): + self.document.latest_version.file_metadata_drivers.all().delete() + self.grant_access( + permission=permission_file_metadata_submit, obj=self.document + ) + response = self._request_document_submit_view() + self.assertEqual(response.status_code, 302) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 1 + ) + + def _request_multiple_document_submit_view(self): + return self.post( + viewname='file_metadata:document_submit_multiple', + data={ + 'id_list': self.document.pk, + } + ) + + def test_multiple_document_submit_view_no_permission(self): + self.document.latest_version.file_metadata_drivers.all().delete() + response = self._request_multiple_document_submit_view() + self.assertEqual(response.status_code, 302) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 0 + ) + + def test_multiple_document_submit_view_with_access(self): + self.document.latest_version.file_metadata_drivers.all().delete() + self.grant_access( + permission=permission_file_metadata_submit, obj=self.document + ) + response = self._request_multiple_document_submit_view() + self.assertEqual(response.status_code, 302) + self.assertEqual( + self.document.latest_version.file_metadata_drivers.count(), 1 + ) + + +class DocumentTypeViewsTestCase(GenericDocumentViewTestCase): + def setUp(self): + super(DocumentTypeViewsTestCase, self).setUp() + self.login_user() + + def _request_document_type_settings_view(self): + return self.get( + viewname='file_metadata:document_type_settings', + args=(self.document.document_type.pk,) + ) + + def test_document_type_settings_view_no_permission(self): + response = self._request_document_type_settings_view() + self.assertEqual(response.status_code, 403) + + def test_document_type_settings_view_with_access(self): + self.grant_access( + permission=permission_document_type_file_metadata_setup, + obj=self.document.document_type + ) + response = self._request_document_type_settings_view() + + self.assertEqual(response.status_code, 200) diff --git a/mayan/apps/file_metadata/urls.py b/mayan/apps/file_metadata/urls.py new file mode 100644 index 0000000000..31de428f00 --- /dev/null +++ b/mayan/apps/file_metadata/urls.py @@ -0,0 +1,33 @@ +from __future__ import unicode_literals + +from django.conf.urls import url + +from .views import ( + DocumentDriverListView, DocumentSubmitView, DocumentTypeSettingsEditView, + DocumentVersionDriverEntryFileMetadataListView +) + +urlpatterns = [ + url( + r'^documents/(?P\d+)/drivers/$', DocumentDriverListView.as_view(), + name='document_driver_list' + ), + url( + r'^documents/(?P\d+)/submit/$', DocumentSubmitView.as_view(), + name='document_submit' + ), + url( + r'^documents/multiple/submit/$', DocumentSubmitView.as_view(), + name='document_submit_multiple' + ), + url( + r'^document_types/(?P\d+)/ocr/settings/$', + DocumentTypeSettingsEditView.as_view(), + name='document_type_settings' + ), + url( + r'^document_version_driver/(?P\d+)/attributes/$', + DocumentVersionDriverEntryFileMetadataListView.as_view(), + name='document_version_driver_file_metadata_list' + ), +] diff --git a/mayan/apps/file_metadata/utils.py b/mayan/apps/file_metadata/utils.py new file mode 100644 index 0000000000..a47d126ced --- /dev/null +++ b/mayan/apps/file_metadata/utils.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from .events import event_file_metadata_document_version_submit +from .tasks import task_process_document_version + + +def method_document_submit(self): + latest_version = self.latest_version + # Don't error out if document has no version + if latest_version: + latest_version.submit_for_file_metadata_processing() + + +def method_document_version_submit(self): + event_file_metadata_document_version_submit.commit( + action_object=self.document, target=self + ) + + task_process_document_version.apply_async( + kwargs={ + 'document_version_id': self.pk, + } + ) + + +def method_get_document_file_metadata(self, dotted_name): + latest_version = self.latest_version + # Don't error out if document has no version + if latest_version: + return latest_version.get_file_metadata( + dotted_name=dotted_name + ) + + +def method_get_document_version_file_metadata(self, dotted_name): + driver_internal_name, key = dotted_name.split('.') + + try: + document_driver = self.file_metadata_drivers.get( + driver__internal_name=driver_internal_name + ) + except self.file_metadata_drivers.model.DoesNotExist: + return + else: + try: + return document_driver.entries.get(key=key).value + except document_driver.entries.model.DoesNotExist: + return diff --git a/mayan/apps/file_metadata/views.py b/mayan/apps/file_metadata/views.py new file mode 100644 index 0000000000..8365f48223 --- /dev/null +++ b/mayan/apps/file_metadata/views.py @@ -0,0 +1,125 @@ +from __future__ import absolute_import, unicode_literals + +from django.shortcuts import get_object_or_404 +from django.urls import reverse_lazy +from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import ungettext + +from mayan.apps.acls.models import AccessControlList +from mayan.apps.common.views import ( + MultipleObjectConfirmActionView, SingleObjectEditView, + SingleObjectListView +) +from mayan.apps.documents.models import Document, DocumentType + +from .icons import icon_file_metadata +from .models import DocumentVersionDriverEntry +from .permissions import ( + permission_document_type_file_metadata_setup, + permission_file_metadata_submit, permission_file_metadata_view +) + + +class DocumentDriverListView(SingleObjectListView): + def get_extra_context(self): + return { + 'hide_object': True, + 'no_results_icon': icon_file_metadata, + 'no_results_text': _( + 'File metadata are the attributes of the document\'s file. ' + 'They can range from camera information used to take a photo ' + 'to the author that created a file. File metadata are set ' + 'when the document\'s file was first created. File metadata ' + 'attributes reside in the file itself. They are not the ' + 'same as the document metadata, which are user defined and ' + 'reside in the database.' + ), + 'no_results_title': _('No file metadata available.'), + 'object': self.get_object(), + 'title': _( + 'File metadata drivers for: %s' + ) % self.get_object(), + } + + def get_object(self): + document = get_object_or_404(klass=Document, pk=self.kwargs['pk']) + AccessControlList.objects.check_access( + permissions=permission_file_metadata_view, + user=self.request.user, obj=document + ) + return document + + def get_object_list(self): + return self.get_object().latest_version.file_metadata_drivers.all() + + +class DocumentVersionDriverEntryFileMetadataListView(SingleObjectListView): + def get_extra_context(self): + return { + 'hide_object': True, + 'no_results_title': _('No file metadata available.'), + 'object': self.get_object().document_version.document, + 'title': _( + 'File metadata attribures for: %(document)s, for driver: %(driver)s' + ) % { + 'document': self.get_object().document_version.document, + 'driver': self.get_object().driver + }, + } + + def get_object(self): + document_version_driver_entry = get_object_or_404( + klass=DocumentVersionDriverEntry, pk=self.kwargs['pk'] + ) + AccessControlList.objects.check_access( + obj=document_version_driver_entry.document_version, + permissions=permission_file_metadata_view, + user=self.request.user, + ) + return document_version_driver_entry + + def get_object_list(self): + return self.get_object().entries.all() + + +class DocumentSubmitView(MultipleObjectConfirmActionView): + model = Document + object_permission = permission_file_metadata_submit + success_message = '%(count)d document submitted to the file metadata queue.' + success_message_plural = '%(count)d documents submitted to the file metadata queue.' + + def get_extra_context(self): + queryset = self.get_queryset() + + result = { + 'title': ungettext( + 'Submit the selected document to the file metadata queue?', + 'Submit the selected documents to the file metadata queue?', + queryset.count() + ) + } + + return result + + def object_action(self, form, instance): + instance.submit_for_file_metadata_processing() + + +class DocumentTypeSettingsEditView(SingleObjectEditView): + fields = ('auto_process',) + object_permission = permission_document_type_file_metadata_setup + post_action_redirect = reverse_lazy('documents:document_type_list') + + def get_document_type(self): + return get_object_or_404(DocumentType, pk=self.kwargs['pk']) + + def get_extra_context(self): + return { + 'object': self.get_document_type(), + 'title': _( + 'Edit file metadata settings for document type: %s' + ) % self.get_document_type() + } + + def get_object(self, queryset=None): + return self.get_document_type().file_metadata_settings diff --git a/mayan/settings/base.py b/mayan/settings/base.py index 88d5d6d98a..9104a74664 100644 --- a/mayan/settings/base.py +++ b/mayan/settings/base.py @@ -105,6 +105,7 @@ INSTALLED_APPS = ( 'mayan.apps.document_signatures', 'mayan.apps.document_states', 'mayan.apps.documents', + 'mayan.apps.file_metadata', 'mayan.apps.linking', 'mayan.apps.mailer', 'mayan.apps.mayan_statistics', diff --git a/mayan/settings/testing/base.py b/mayan/settings/testing/base.py index a32a38ba5c..b96a5660bb 100644 --- a/mayan/settings/testing/base.py +++ b/mayan/settings/testing/base.py @@ -2,20 +2,43 @@ from __future__ import absolute_import, unicode_literals from .. import * # NOQA +CELERY_TASK_ALWAYS_EAGER = True +CELERY_TASK_EAGER_PROPAGATES = True + +COMMON_PRODUCTION_ERROR_LOG_PATH = '/tmp/mayan-errors.log' + +DOCUMENT_PARSING_AUTO_PARSING = False + +FILE_METADATA_AUTO_PROCESS = False + INSTALLED_APPS += ('test_without_migrations',) INSTALLED_APPS = [ cls for cls in INSTALLED_APPS if cls != 'whitenoise.runserver_nostatic' ] -COMMON_PRODUCTION_ERROR_LOG_PATH = '/tmp/mayan-errors.log' - # Remove whitenoise from middlewares. Causes out of memory errors during test # suit MIDDLEWARE = [ cls for cls in MIDDLEWARE if cls != 'whitenoise.middleware.WhiteNoiseMiddleware' ] +# Remove middlewares not used for tests +MIDDLEWARE = [ + cls for cls in MIDDLEWARE if cls not in [ + 'common.middleware.error_logging.ErrorLoggingMiddleware', + 'django.middleware.security.SecurityMiddleware', + 'corsheaders.middleware.CorsMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', + 'django.middleware.locale.LocaleMiddleware', + 'common.middleware.timezone.TimezoneMiddleware', + 'common.middleware.ajax_redirect.AjaxRedirect', + ] +] + +OCR_AUTO_OCR = False + # User a simpler password hasher PASSWORD_HASHERS = ( 'django.contrib.auth.hashers.MD5PasswordHasher', @@ -32,20 +55,3 @@ TEMPLATES[0]['OPTIONS']['loaders'] = ( ) ), ) - -CELERY_TASK_ALWAYS_EAGER = True -CELERY_TASK_EAGER_PROPAGATES = True - -# Remove middlewares not used for tests -MIDDLEWARE = [ - cls for cls in MIDDLEWARE if cls not in [ - 'common.middleware.error_logging.ErrorLoggingMiddleware', - 'django.middleware.security.SecurityMiddleware', - 'corsheaders.middleware.CorsMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', - 'django.middleware.locale.LocaleMiddleware', - 'common.middleware.timezone.TimezoneMiddleware', - 'common.middleware.ajax_redirect.AjaxRedirect', - ] -]