Split metadata validators in validators and parsers. Move document and upload wizard metadata validation logic to model. Add metadata tests.

This commit is contained in:
Roberto Rosario
2015-08-20 04:37:49 -04:00
parent cc2927c4cd
commit 9599a3f8ab
11 changed files with 336 additions and 98 deletions

View File

@@ -6,7 +6,9 @@ from .models import MetadataType
class MetadataTypeAdmin(admin.ModelAdmin):
list_display = ('name', 'label', 'default', 'lookup', 'validation')
list_display = (
'name', 'label', 'default', 'lookup', 'validation', 'parser'
)
admin.site.register(MetadataType, MetadataTypeAdmin)

View File

@@ -10,65 +10,20 @@ from django.utils.module_loading import import_string
from django.utils.translation import string_concat, ugettext_lazy as _
from .classes import MetadataLookup
from .models import MetadataType
from .models import DocumentMetadata, MetadataType
class MetadataForm(forms.Form):
@staticmethod
def comma_splitter(string):
splitter = shlex.shlex(string.encode('utf-8'), posix=True)
splitter.whitespace = ','.encode('utf-8')
splitter.whitespace_split = True
splitter.commenters = ''.encode('utf-8')
return list(splitter)
def clean_value(self):
metadata_type = MetadataType.objects.get(pk=self.cleaned_data['id'])
if metadata_type.validation:
validator = import_string(metadata_type.validation)()
try:
# If it is a parsing function we should get a value
# If it is a validation function we get nothing on success
result = validator.validate(self.cleaned_data['value'])
except Exception as exception:
# If it is a validation function and an exception is raise
# we wrap that into a new ValidationError exception
# If the function exception is a ValidationError itself the
# error messages will be in a 'messages' property, so we
# contatenate them.
# Otherwise we extract whatever single message the exception
# included.
try:
message = ', '.join(exception.messages)
except AttributeError:
message = unicode(exception)
raise ValidationError(
_('Invalid value: %(message)s'), params={
'message': message
}, code='invalid'
)
else:
# Return the result if it was a parsing function
# If it was a validation function and passed correctly
# we return the original input value
return result or self.cleaned_data['value']
else:
# If a validator was never specified we return the original
# value
return self.cleaned_data['value']
def __init__(self, *args, **kwargs):
super(MetadataForm, self).__init__(*args, **kwargs)
# Set form fields initial values
if 'initial' in kwargs:
self.metadata_type = kwargs['initial'].pop('metadata_type', None)
required = kwargs['initial'].pop('required', False)
self.document_type = kwargs['initial'].pop('document_type', None)
required_string = ''
required = self.metadata_type.get_required_for(document_type=self.document_type)
if required:
self.fields['value'].required = True
required_string = ' (%s)' % _('Required')
@@ -85,14 +40,10 @@ class MetadataForm(forms.Form):
if self.metadata_type.lookup:
try:
template = Template(self.metadata_type.lookup)
context = Context(MetadataLookup.get_as_context())
choices = MetadataForm.comma_splitter(
template.render(context=context)
)
self.fields['value'] = forms.ChoiceField(
label=self.fields['value'].label
)
choices = self.metadata_type.get_lookup_values()
choices = zip(choices, choices)
if not required:
choices.insert(0, ('', '------'))
@@ -108,10 +59,9 @@ class MetadataForm(forms.Form):
if self.metadata_type.default:
try:
template = Template(self.metadata_type.default)
context = Context()
result = template.render(context=context)
self.fields['value'].initial = result
self.fields[
'value'
].initial = self.metadata_type.get_default_value()
except Exception as exception:
self.fields['value'].initial = _(
'Default value error: %s'
@@ -120,6 +70,9 @@ class MetadataForm(forms.Form):
attrs={'readonly': 'readonly'}
)
def clean_value(self):
return self.metadata_type.validate_value(document_type=self.document_type, value=self.cleaned_data['value'])
id = forms.CharField(label=_('ID'), widget=forms.HiddenInput)
name = forms.CharField(
@@ -147,7 +100,7 @@ class AddMetadataForm(forms.Form):
class MetadataTypeForm(forms.ModelForm):
class Meta:
fields = ('name', 'label', 'default', 'lookup', 'validation')
fields = ('name', 'label', 'default', 'lookup', 'validation', 'parser')
model = MetadataType
def __init__(self, *args, **kwargs):

View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
('metadata', '0005_auto_20150729_2344'),
]
operations = [
migrations.AddField(
model_name='metadatatype',
name='parser',
field=models.CharField(blank=True, help_text='The parser will reformat the value entered to conform to the expected format.', max_length=64, verbose_name='Parser', choices=[(b'metadata.validators.DateAndTimeValidator', b'metadata.validators.DateAndTimeValidator'), (b'metadata.validators.DateValidator', b'metadata.validators.DateValidator'), (b'metadata.validators.TimeValidator', b'metadata.validators.TimeValidator')]),
preserve_default=True,
),
migrations.AlterField(
model_name='metadatatype',
name='validation',
field=models.CharField(blank=True, help_text='The validator will reject data entry if the value entered does not conform to the expected format.', max_length=64, verbose_name='Validator', choices=[(b'metadata.validators.DateAndTimeValidator', b'metadata.validators.DateAndTimeValidator'), (b'metadata.validators.DateValidator', b'metadata.validators.DateValidator'), (b'metadata.validators.TimeValidator', b'metadata.validators.TimeValidator')]),
preserve_default=True,
),
]

View File

@@ -1,14 +1,19 @@
from __future__ import unicode_literals
import shlex
from django.core.exceptions import ValidationError
from django.db import models
from django.template import Context, Template
from django.utils.encoding import python_2_unicode_compatible
from django.utils.module_loading import import_string
from django.utils.translation import ugettext_lazy as _
from documents.models import Document, DocumentType
from .classes import MetadataLookup
from .managers import MetadataTypeManager
from .settings import setting_available_validators
from .settings import setting_available_parsers, setting_available_validators
def validation_choices():
@@ -18,11 +23,27 @@ def validation_choices():
)
def parser_choices():
return zip(
setting_available_parsers.value,
setting_available_parsers.value
)
@python_2_unicode_compatible
class MetadataType(models.Model):
"""
Define a type of metadata
"""
@staticmethod
def comma_splitter(string):
splitter = shlex.shlex(string.encode('utf-8'), posix=True)
splitter.whitespace = ','.encode('utf-8')
splitter.whitespace_split = True
splitter.commenters = ''.encode('utf-8')
return list(splitter)
name = models.CharField(
max_length=48,
help_text=_(
@@ -52,9 +73,19 @@ class MetadataType(models.Model):
verbose_name=_('Lookup')
)
validation = models.CharField(
blank=True, choices=validation_choices(), max_length=64,
verbose_name=_('Validation function name')
blank=True, choices=validation_choices(),
help_text=_(
'The validator will reject data entry if the value entered does '
'not conform to the expected format.'
), max_length=64, verbose_name=_('Validator')
)
parser = models.CharField(
blank=True, choices=parser_choices(), help_text=_(
'The parser will reformat the value entered to conform to the '
'expected format.'
), max_length=64, verbose_name=_('Parser')
)
objects = MetadataTypeManager()
def __str__(self):
@@ -63,6 +94,49 @@ class MetadataType(models.Model):
def natural_key(self):
return (self.name,)
def get_default_value(self):
template = Template(self.default)
context = Context()
return template.render(context=context)
def get_lookup_values(self):
template = Template(self.lookup)
context = Context(MetadataLookup.get_as_context())
return MetadataType.comma_splitter(template.render(context=context))
def get_required_for(self, document_type):
return self in document_type.metadata.filter(required=True)
def validate_value(self, document_type, value):
# Check default
if not value and self.default:
value = self.get_default_value()
if not value and self.get_required_for(document_type=document_type):
raise ValidationError(
{
'value': _(
'This metadata is required for this document type.'
)
}
)
if self.lookup:
lookup_options = self.get_lookup_values()
if value not in lookup_options:
raise ValidationError(
{'value': _('Value is not one of the provided options.')}
)
if self.validation:
validator = import_string(self.validation)()
validator.validate(value)
if self.parser:
parser = import_string(self.parser)()
value = parser.parse(value)
return value
class Meta:
ordering = ('label',)
verbose_name = _('Metadata type')
@@ -87,14 +161,6 @@ class DocumentMetadata(models.Model):
def __str__(self):
return unicode(self.metadata_type)
def save(self, *args, **kwargs):
if self.metadata_type.pk not in self.document.document_type.metadata.values_list('metadata_type', flat=True):
raise ValidationError(
_('Metadata type is not valid for this document type.')
)
return super(DocumentMetadata, self).save(*args, **kwargs)
def delete(self, enforce_required=True, *args, **kwargs):
if enforce_required and self.metadata_type.pk in self.document.document_type.metadata.filter(required=True).values_list('metadata_type', flat=True):
raise ValidationError(
@@ -103,9 +169,26 @@ class DocumentMetadata(models.Model):
return super(DocumentMetadata, self).delete(*args, **kwargs)
def save(self, *args, **kwargs):
if self.metadata_type.pk not in self.document.document_type.metadata.values_list('metadata_type', flat=True):
raise ValidationError(
_('Metadata type is not valid for this document type.')
)
return super(DocumentMetadata, self).save(*args, **kwargs)
def clean_fields(self, *args, **kwargs):
super(DocumentMetadata, self).clean_fields(*args, **kwargs)
self.value = self.metadata_type.validate_value(
document_type=self.document.document_type, value=self.value
)
@property
def is_required(self):
return self.metadata_type in self.document.document_type.metadata.filter(required=True)
return self.metadata_type.get_required_for(
document_type=self.document.document_type
)
class Meta:
unique_together = ('document', 'metadata_type')

View File

@@ -0,0 +1,54 @@
from __future__ import unicode_literals
from dateutil.parser import parse
from django.core.exceptions import ValidationError
class MetadataParser(object):
_registry = []
@classmethod
def register(cls, parser):
cls._registry.append(parser)
@classmethod
def get_all(cls):
return cls._registry
@classmethod
def get_import_path(cls):
return cls.__module__ + '.' + cls.__name__
@classmethod
def get_import_paths(cls):
return [validator.get_import_path() for validator in cls.get_all()]
def execute(self, input_data):
raise NotImplementedError
def parse(self, input_data):
try:
return self.execute(input_data)
except Exception as exception:
raise ValidationError(exception)
class DateAndTimeParser(MetadataParser):
def execute(self, input_data):
return parse(input_data).isoformat()
class DateParser(MetadataParser):
def execute(self, input_data):
return parse(input_data).date().isoformat()
class TimeParser(MetadataParser):
def execute(self, input_data):
return parse(input_data).time().isoformat()
MetadataParser.register(DateAndTimeParser)
MetadataParser.register(DateParser)
MetadataParser.register(TimeParser)

View File

@@ -4,6 +4,7 @@ from django.utils.translation import ugettext_lazy as _
from smart_settings import Namespace
from .parsers import MetadataParser
from .validators import MetadataValidator
namespace = Namespace(name='metadata', label=_('Metadata'))
@@ -11,3 +12,7 @@ setting_available_validators = namespace.add_setting(
global_name='METADATA_AVAILABLE_VALIDATORS',
default=MetadataValidator.get_import_paths()
)
setting_available_parsers = namespace.add_setting(
global_name='METADATA_AVAILABLE_PARSERS',
default=MetadataParser.get_import_paths()
)

View File

View File

@@ -0,0 +1,126 @@
from __future__ import unicode_literals
from django.core.files.base import File
from django.core.exceptions import ValidationError
from django.test import TestCase
from documents.models import DocumentType
from documents.tests import TEST_SMALL_DOCUMENT_PATH, TEST_DOCUMENT_TYPE
from ..models import MetadataType, DocumentMetadata, DocumentTypeMetadataType
TEST_DEFAULT_VALUE = 'test'
TEST_LOOKUP_TEMPLATE = '1,2,3'
TEST_INCORRECT_LOOKUP_VALUE = '0'
TEST_CORRECT_LOOKUP_VALUE = '1'
TEST_DATE_VALIDATOR = 'metadata.validators.DateValidator'
TEST_DATE_PARSER = 'metadata.parsers.DateParser'
TEST_INVALID_DATE = '___________'
TEST_VALID_DATE = '2001-1-1'
TEST_PARSED_VALID_DATE = '2001-01-01'
class MetadataTestCase(TestCase):
def setUp(self):
self.metadata_type = MetadataType.objects.create(
name='test', label='test'
)
self.document_type = DocumentType.objects.create(
label=TEST_DOCUMENT_TYPE
)
ocr_settings = self.document_type.ocr_settings
ocr_settings.auto_ocr = False
ocr_settings.save()
self.document_type.metadata.create(metadata_type=self.metadata_type)
with open(TEST_SMALL_DOCUMENT_PATH) as file_object:
self.document = self.document_type.new_document(
file_object=File(file_object)
)
def tearDown(self):
self.document_type.delete()
self.metadata_type.delete()
def test_no_default(self):
document_metadata = DocumentMetadata(
document=self.document, metadata_type=self.metadata_type
)
document_metadata.full_clean()
document_metadata.save()
self.assertEqual(self.document.metadata_value_of.test, None)
def test_default(self):
self.metadata_type.default = TEST_DEFAULT_VALUE
self.metadata_type.save()
document_metadata = DocumentMetadata(
document=self.document, metadata_type=self.metadata_type
)
document_metadata.full_clean()
document_metadata.save()
self.assertEqual(self.document.metadata_value_of.test, TEST_DEFAULT_VALUE)
def test_lookup(self):
self.metadata_type.lookup = TEST_LOOKUP_TEMPLATE
document_metadata = DocumentMetadata(
document=self.document, metadata_type=self.metadata_type, value=TEST_INCORRECT_LOOKUP_VALUE
)
with self.assertRaises(ValidationError):
# Should return error
document_metadata.full_clean()
document_metadata.save()
# Should not return error
document_metadata.value=TEST_CORRECT_LOOKUP_VALUE
document_metadata.full_clean()
document_metadata.save()
self.assertEqual(self.document.metadata_value_of.test, TEST_CORRECT_LOOKUP_VALUE)
def test_validation(self):
self.metadata_type.validation = TEST_DATE_VALIDATOR
document_metadata = DocumentMetadata(
document=self.document, metadata_type=self.metadata_type, value=TEST_INVALID_DATE
)
with self.assertRaises(ValidationError):
# Should return error
document_metadata.full_clean()
document_metadata.save()
# Should not return error
document_metadata.value=TEST_VALID_DATE
document_metadata.full_clean()
document_metadata.save()
self.assertEqual(self.document.metadata_value_of.test, TEST_VALID_DATE)
def test_parsing(self):
self.metadata_type.parser = TEST_DATE_PARSER
document_metadata = DocumentMetadata(
document=self.document, metadata_type=self.metadata_type, value=TEST_INVALID_DATE
)
with self.assertRaises(ValidationError):
# Should return error
document_metadata.full_clean()
document_metadata.save()
# Should not return error
document_metadata.value=TEST_VALID_DATE
document_metadata.full_clean()
document_metadata.save()
self.assertEqual(self.document.metadata_value_of.test, TEST_PARSED_VALID_DATE)

View File

@@ -2,42 +2,33 @@ from __future__ import unicode_literals
from dateutil.parser import parse
from django.core.exceptions import ValidationError
class MetadataValidator(object):
from .parsers import MetadataParser
class MetadataValidator(MetadataParser):
_registry = []
@classmethod
def register(cls, parser):
cls._registry.append(parser)
@classmethod
def get_all(cls):
return cls._registry
@classmethod
def get_import_path(cls):
return cls.__module__ + '.' + cls.__name__
@classmethod
def get_import_paths(cls):
return [validator.get_import_path() for validator in cls.get_all()]
def parse(self, input_data):
raise NotImplementedError
def validate(self, input_data):
try:
self.execute(input_data)
except Exception as exception:
raise ValidationError(exception)
class DateAndTimeValidator(MetadataValidator):
def validate(self, input_data):
def execute(self, input_data):
return parse(input_data).isoformat()
class DateValidator(MetadataValidator):
def validate(self, input_data):
def execute(self, input_data):
return parse(input_data).date().isoformat()
class TimeValidator(MetadataValidator):
def validate(self, input_data):
def execute(self, input_data):
return parse(input_data).time().isoformat()

View File

@@ -121,11 +121,9 @@ def metadata_edit(request, document_id=None, document_id_list=None):
for key, value in metadata.items():
initial.append({
'document_type': document.document_type,
'metadata_type': key,
'value': ', '.join(value) if value else '',
'required': key in document.document_type.metadata.filter(
required=True
),
})
formset = MetadataFormSet(initial=initial)

View File

@@ -16,7 +16,7 @@ from .models import InteractiveSource
class DocumentCreateWizard(ViewPermissionCheckMixin, SessionWizardView):
form_list = [DocumentTypeSelectForm, MetadataFormSet]
form_list = (DocumentTypeSelectForm, MetadataFormSet)
template_name = 'appearance/generic_wizard.html'
extra_context = {}
view_permission = permission_document_create
@@ -54,8 +54,8 @@ class DocumentCreateWizard(ViewPermissionCheckMixin, SessionWizardView):
for document_type_metadata_type in self.get_cleaned_data_for_step('0')['document_type'].metadata.all():
initial.append({
'document_type': self.get_cleaned_data_for_step('0')['document_type'],
'metadata_type': document_type_metadata_type.metadata_type,
'required': document_type_metadata_type.required,
})
return initial