Finished adding metadata based groups

This commit is contained in:
Roberto Rosario
2011-02-12 01:57:24 -04:00
parent c83c2e0d46
commit f3fab1b7d9
8 changed files with 89 additions and 63 deletions

View File

@@ -19,7 +19,7 @@ Features
* User defined document checksum algorithm * User defined document checksum algorithm
* Previews for a great deal of image formats, including PDF * Previews for a great deal of image formats, including PDF
* Document OCR and searching * Document OCR and searching
* Group documents by metadata automatically
Requirements Requirements
--- ---

View File

@@ -6,7 +6,7 @@
{% if side_bar %} {% if side_bar %}
<div class="block"> <div class="block">
<h3> <h3>
{{ title }} {{ title|capfirst }}
</h3> </h3>
<div class="content"> <div class="content">
<p> <p>

View File

@@ -39,6 +39,10 @@ PREVIEW_SIZE = getattr(settings, 'DOCUMENTS_PREVIEW_SIZE', '640x480')
THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50') THUMBNAIL_SIZE = getattr(settings, 'DOCUMENTS_THUMBNAIL_SIZE', '50x50')
DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1024x768') DISPLAY_SIZE = getattr(settings, 'DOCUMENTS_DISPLAY_SIZE', '1024x768')
#Groups
GROUP_MAX_RESULTS = getattr(settings, 'DOCUMENTS_GROUP_MAX_RESULTS', 20)
GROUP_SHOW_EMPTY = getattr(settings, 'DOCUMENTS_GROUP_SHOW_EMPTY', True)
# Serving # Serving
FILESYSTEM_FILESERVING_ENABLE = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERVING_ENABLE', True) FILESYSTEM_FILESERVING_ENABLE = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERVING_ENABLE', True)
FILESYSTEM_FILESERVING_PATH = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', u'/tmp/mayan/documents') FILESYSTEM_FILESERVING_PATH = getattr(settings, 'DOCUMENTS_FILESYSTEM_FILESERVING_PATH', u'/tmp/mayan/documents')

View File

@@ -10,7 +10,8 @@ from django.db import models
from django.template.defaultfilters import slugify from django.template.defaultfilters import slugify
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from django.utils.translation import ugettext from django.utils.translation import ugettext
from django.db.models import Q
from dynamic_search.api import register from dynamic_search.api import register
from documents.conf.settings import AVAILABLE_FUNCTIONS from documents.conf.settings import AVAILABLE_FUNCTIONS
@@ -110,6 +111,36 @@ class Document(models.Model):
#topics/db/queries.html#topics-db-queries-delete #topics/db/queries.html#topics-db-queries-delete
self.delete_fs_links() self.delete_fs_links()
super(Document, self).delete(*args, **kwargs) super(Document, self).delete(*args, **kwargs)
def get_metadata_groups(self):
errors = []
metadata_groups = {}
if MetadataGroup.objects.all().count():
metadata_dict = {}
for document_metadata in self.documentmetadata_set.all():
metadata_dict['metadata_%s' % document_metadata.metadata_type.name] = document_metadata.value
for group in MetadataGroup.objects.filter((Q(document_type=self.document_type) | Q(document_type=None)) & Q(enabled=True)):
total_query = Q()
for item in group.metadatagroupitem_set.filter(enabled=True):
try:
value_query = Q(**{'value__%s' % item.operator: eval(item.expression, metadata_dict)})
except Exception, e:
errors.append(e)
value_query = Q()
if item.negated:
query = (Q(metadata_type__id=item.metadata_type.id) & ~value_query)
else:
query = (Q(metadata_type__id=item.metadata_type.id) & value_query)
if item.inclusion == INCLUSION_AND:
total_query &= query
elif item.inclusion == INCLUSION_OR:
total_query |= query
document_id_list = DocumentMetadata.objects.filter(query).values_list('document', flat=True)
metadata_groups[group] = Document.objects.filter(Q(id__in=document_id_list) & ~Q(id=self.id)) or []
return metadata_groups, errors
def create_fs_links(self): def create_fs_links(self):
if FILESYSTEM_FILESERVING_ENABLE: if FILESYSTEM_FILESERVING_ENABLE:
@@ -326,6 +357,7 @@ class MetadataGroup(models.Model):
verbose_name=_(u'document type'), help_text=_(u'If left blank, all document types will be matched.')) verbose_name=_(u'document type'), help_text=_(u'If left blank, all document types will be matched.'))
name = models.CharField(max_length=32, verbose_name=_(u'name')) name = models.CharField(max_length=32, verbose_name=_(u'name'))
label = models.CharField(max_length=32, verbose_name=_(u'label')) label = models.CharField(max_length=32, verbose_name=_(u'label'))
enabled = models.BooleanField(default=True, verbose_name=_(u'enabled'))
def __unicode__(self): def __unicode__(self):
return self.label if self.label else self.name return self.label if self.label else self.name
@@ -335,7 +367,6 @@ class MetadataGroup(models.Model):
verbose_name_plural = _(u'metadata document groups') verbose_name_plural = _(u'metadata document groups')
INCLUSION_AND = '&' INCLUSION_AND = '&'
INCLUSION_OR = '|' INCLUSION_OR = '|'
@@ -344,24 +375,36 @@ INCLUSION_CHOICES = (
(INCLUSION_OR, _(u'or')), (INCLUSION_OR, _(u'or')),
) )
OPERATOR_EQUAL = ' '
OPERATOR_IS_NOT_EQUAL = '~'
OPERATOR_CHOCIES = ( OPERATOR_CHOCIES = (
(OPERATOR_EQUAL, _(u'is equal')), ('exact', _(u'is equal')),
(OPERATOR_IS_NOT_EQUAL, _(u'is not equal')), ('iexact', _(u'is equal (case insensitive)')),
('contains', _(u'contains')),
('icontains', _(u'contains (case insensitive)')),
('in', _(u'is in')),
('gt', _(u'is greater than')),
('gte', _(u'is greater than or equal')),
('lt', _(u'is less than')),
('lte', _(u'is less than or equal')),
('startswith', _(u'starts with')),
('istartswith', _(u'starts with (case insensitive)')),
('endswith', _(u'ends with')),
('iendswith', _(u'ends with (case insensitive)')),
('regex', _(u'is in regular expression')),
('iregex', _(u'is in regular expression (case insensitive)')),
) )
class MetadataGroupItem(models.Model): class MetadataGroupItem(models.Model):
metadata_group = models.ForeignKey(MetadataGroup, verbose_name=_(u'metadata group')) metadata_group = models.ForeignKey(MetadataGroup, verbose_name=_(u'metadata group'))
inclusion = models.CharField(default=INCLUSION_AND, max_length=16, choices=INCLUSION_CHOICES) inclusion = models.CharField(default=INCLUSION_AND, max_length=16, choices=INCLUSION_CHOICES, help_text=_(u'The inclusion is ignored for the first item.'))
metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type'), help_text=_(u'This represents the metadata of all other documents.')) metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type'), help_text=_(u'This represents the metadata of all other documents.'))
operator = models.CharField(max_length=16, choices=OPERATOR_CHOCIES) operator = models.CharField(max_length=16, choices=OPERATOR_CHOCIES)
expression = models.CharField(max_length=64, expression = models.CharField(max_length=128,
verbose_name=_(u'expression'), help_text=_(u'This expression will be evaluated against the current seleted document. The document metadata is available as variables of the same name but with the "metadata_" prefix added their name.')) verbose_name=_(u'expression'), help_text=_(u'This expression will be evaluated against the current seleted document. The document metadata is available as variables of the same name but with the "metadata_" prefix added their name.'))
negated = models.BooleanField(default=False, verbose_name=_(u'negated'), help_text=_(u'Inverts the logic of the operator.'))
enabled = models.BooleanField(default=True, verbose_name=_(u'enabled'))
def __unicode__(self): def __unicode__(self):
return '%s %s %s %s' % (self.get_inclusion_display(), self.metadata_type, self.get_operator_display(), self.expression) return '[%s] %s %s %s %s %s' % ('x' if self.enabled else ' ', self.get_inclusion_display(), self.metadata_type, _(u'not') if self.negated else '', self.get_operator_display(), self.expression)
class Meta: class Meta:
verbose_name = _(u'metadata group item') verbose_name = _(u'metadata group item')

View File

@@ -32,6 +32,8 @@ from documents.conf.settings import FILESYSTEM_FILESERVING_ENABLE
from documents.conf.settings import STAGING_FILES_PREVIEW_SIZE from documents.conf.settings import STAGING_FILES_PREVIEW_SIZE
from documents.conf.settings import PREVIEW_SIZE from documents.conf.settings import PREVIEW_SIZE
from documents.conf.settings import THUMBNAIL_SIZE from documents.conf.settings import THUMBNAIL_SIZE
from documents.conf.settings import GROUP_MAX_RESULTS
from documents.conf.settings import GROUP_SHOW_EMPTY
from utils import save_metadata, save_metadata_list, decode_metadata_from_url from utils import save_metadata, save_metadata_list, decode_metadata_from_url
@@ -192,12 +194,6 @@ def upload_document_with_type(request, document_type_id, multiple=True):
return render_to_response('generic_form.html', context, return render_to_response('generic_form.html', context,
context_instance=RequestContext(request)) context_instance=RequestContext(request))
from django.db.models import Q
from models import MetadataGroup
from models import INCLUSION_AND, INCLUSION_OR, OPERATOR_EQUAL, OPERATOR_IS_NOT_EQUAL
def document_view(request, document_id): def document_view(request, document_id):
document = get_object_or_404(Document, pk=document_id) document = get_object_or_404(Document, pk=document_id)
form = DocumentForm_view(instance=document, extra_fields=[ form = DocumentForm_view(instance=document, extra_fields=[
@@ -213,43 +209,12 @@ def document_view(request, document_id):
{'label':_(u'UUID'), 'field':'uuid'}, {'label':_(u'UUID'), 'field':'uuid'},
]) ])
metadata_groups = {}
if MetadataGroup.objects.all().count(): metadata_groups, errors = document.get_metadata_groups()
metadata_dict = {} if request.user.is_staff and errors:
for document_metadata in document.documentmetadata_set.all(): for error in errors:
metadata_dict['metadata_%s' % document_metadata.metadata_type.name] = document_metadata.value messages.warning(request, _(u'Metadata group query error: %s' % error))
for group in MetadataGroup.objects.filter(Q(document_type=document.document_type) | Q(document_type=None)):
total_query = None
for count, item in enumerate(group.metadatagroupitem_set.all()):
try:
expression_result = eval(item.expression, metadata_dict)
if item.operator == OPERATOR_EQUAL:
value_query = Q(documentmetadata__value=expression_result)
elif item.operator == OPERATOR_IS_NOT_EQUAL:
value_query = ~Q(documentmetadata__value=expression_result)
query = (Q(documentmetadata__metadata_type__id=item.metadata_type.id) & value_query)
if count == 0:
total_query = query
else:
if item.inclusion == INCLUSION_AND:
total_query &= query
elif item.inclusion == INCLUSION_AND:
total_query |= query
except Exception, e:
if request.user.is_staff:
messages.warning(request, _(u'Metadata group query error: %s' % e))
else:
pass
if total_query:
print 'total_query',total_query
metadata_groups[group] = Document.objects.filter(total_query)
print 'documents',Document.objects.filter(total_query)
preview_form = DocumentPreviewForm(document=document) preview_form = DocumentPreviewForm(document=document)
form_list = [ form_list = [
{ {
@@ -284,13 +249,21 @@ def document_view(request, document_id):
sidebar_groups = [] sidebar_groups = []
for group, data in metadata_groups.items(): for group, data in metadata_groups.items():
sidebar_groups.append({ if len(data) or GROUP_SHOW_EMPTY:
'title':group.label, if len(data):
'name':'generic_list_subtemplate.html', if len(data) > GROUP_MAX_RESULTS:
'object_list':data, total_string = '(%s out of %s)' % (GROUP_MAX_RESULTS, len(data))
'hide_columns':True, else:
'hide_header':True, total_string = '(%s)' % len(data)
}) else:
total_string = ''
sidebar_groups.append({
'title':'%s %s' % (group.label, total_string),
'name':'generic_list_subtemplate.html',
'object_list':data[:GROUP_MAX_RESULTS],
'hide_columns':True,
'hide_header':True,
})
return render_to_response('generic_detail.html', { return render_to_response('generic_detail.html', {
'form_list':form_list, 'form_list':form_list,

View File

@@ -3,3 +3,4 @@
* Show only document metadata in document list view. * Show only document metadata in document list view.
* If one document type exists, the create document wizard skips the first step. * If one document type exists, the create document wizard skips the first step.
* Changed to a liquid css grid * Changed to a liquid css grid
* Added the ability to group documents by their metadata

View File

@@ -25,6 +25,7 @@
* Add css grids - DONE * Add css grids - DONE
* If theres only one document type on db skip step 1 of wizard - DONE * If theres only one document type on db skip step 1 of wizard - DONE
* Be able to delete staging file - DONE * Be able to delete staging file - DONE
* Group documents by metadata - DONE
* Document list filtering by metadata * Document list filtering by metadata
* Filterform date filtering widget * Filterform date filtering widget
* Validate GET data before saving file * Validate GET data before saving file
@@ -56,7 +57,6 @@
* Add unpaper to pre OCR document cleanup * Add unpaper to pre OCR document cleanup
* Support distributed OCR queues (RabbitMQ & Celery?) * Support distributed OCR queues (RabbitMQ & Celery?)
* DXF viewer - http://code.google.com/p/dxf-reader/source/browse/#svn%2Ftrunk * DXF viewer - http://code.google.com/p/dxf-reader/source/browse/#svn%2Ftrunk
* Group documents by metadata
* Support spreadsheets, wordprocessing docs using openoffice in server mode * Support spreadsheets, wordprocessing docs using openoffice in server mode
* WebDAV support * WebDAV support
* Handle ziped or rar archives * Handle ziped or rar archives

View File

@@ -191,6 +191,10 @@ LOGIN_EXEMPT_URLS = (
#DOCUMENTS_THUMBNAIL_SIZE = '50x50' #DOCUMENTS_THUMBNAIL_SIZE = '50x50'
#DOCUMENTS_DISPLAY_SIZE = '1024x768' #DOCUMENTS_DISPLAY_SIZE = '1024x768'
# Groups
#DOCUMENTS_GROUP_MAX_RESULTS = 20
#DOCUMENTS_GROUP_SHOW_EMPTY = True
# Serving # Serving
#DOCUMENTS_FILESYSTEM_FILESERVING_ENABLE = True #DOCUMENTS_FILESYSTEM_FILESERVING_ENABLE = True
#DOCUMENTS_FILESYSTEM_FILESERVING_PATH = u'/tmp/mayan/documents' #DOCUMENTS_FILESYSTEM_FILESERVING_PATH = u'/tmp/mayan/documents'
@@ -202,6 +206,7 @@ LOGIN_EXEMPT_URLS = (
#CONVERTER_CONVERT_PATH = u'/usr/bin/convert' #CONVERTER_CONVERT_PATH = u'/usr/bin/convert'
#CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200' #CONVERTER_OCR_OPTIONS = u'-colorspace Gray -depth 8 -resample 200x200'
#OCR_TESSERACT_PATH = u'/usr/bin/tesseract' #OCR_TESSERACT_PATH = u'/usr/bin/tesseract'
# Override # Override
SEARCH_SHOW_OBJECT_TYPE = False SEARCH_SHOW_OBJECT_TYPE = False
#======== End of configuration options ======= #======== End of configuration options =======