From 90606086ccf7ac0ca976384786ded61d696023b3 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 16 Mar 2011 02:02:16 -0400 Subject: [PATCH 1/9] Updated TODO --- docs/TODO | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/TODO b/docs/TODO index c22be6b14a..6651f6af00 100644 --- a/docs/TODO +++ b/docs/TODO @@ -111,7 +111,10 @@ Documents * Create 'simple view' document view for non technical users - DONE * Unify document form classes * Use document preview code for staging file also - DONE -* Delete physical file on delete method +* Delete physical file on delete method - DEFFERED (Not needed until Django 1.3) +* Receive documents via email +* Mobile version +* Exif to metadata convertion Filesystem serving ================== @@ -135,6 +138,7 @@ Convert * DXF viewer - http://code.google.com/p/dxf-reader/source/browse/#svn%2Ftrunk * Support spreadsheets, wordprocessing docs using openoffice in server mode * Cache.cleanup function to delete cached images when document hash changes +* Support ExactImage Storage ======= @@ -162,4 +166,6 @@ OCR * Two types of OCR nodes: thin, fat (thin = document file is passed serialize to node, fat = has direct access to document storage read document file) * Move document in queue (up, down, top, bottom) - +* Support ocropus +* Support cuneiform +* Implement StringIO From 85b0efcc5449ddece451175b7a6f0abee31e944f Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 16 Mar 2011 03:47:04 -0400 Subject: [PATCH 2/9] Small search optimization --- apps/dynamic_search/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/dynamic_search/api.py b/apps/dynamic_search/api.py index 0682ee4553..8b91f9a259 100644 --- a/apps/dynamic_search/api.py +++ b/apps/dynamic_search/api.py @@ -84,7 +84,7 @@ def perform_search(query_string): model_result_ids &= single_result_ids result_count += len(model_result_ids) - results = model.objects.filter(pk__in=model_result_ids)[:LIMIT] + results = model.objects.filter(pk__in=list(model_result_ids)[:LIMIT])[:LIMIT] shown_result_count += results.count() if results: model_list[data['text']] = results From d568719c187a7418f56f17e789af98a51674a0b3 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 16 Mar 2011 04:50:36 -0400 Subject: [PATCH 3/9] Get attributes for object or parent of object --- apps/common/templatetags/attribute_tags.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/apps/common/templatetags/attribute_tags.py b/apps/common/templatetags/attribute_tags.py index 2fafff9a97..cea6f1a49c 100644 --- a/apps/common/templatetags/attribute_tags.py +++ b/apps/common/templatetags/attribute_tags.py @@ -12,5 +12,9 @@ def object_property(value, arg): return return_attrib(value, arg) @register.filter -def get_model_list_columns(value): - return model_list_columns.get(type(value), []) +def get_model_list_columns(obj): + for key, value in model_list_columns.items(): + if isinstance(obj, key): + return value + + return [] From adeebfda4563e09ff54d1f09f28b320a77c9d932 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 16 Mar 2011 04:51:01 -0400 Subject: [PATCH 4/9] Updated TODO --- docs/TODO | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/TODO b/docs/TODO index 6651f6af00..c95c73dc2f 100644 --- a/docs/TODO +++ b/docs/TODO @@ -47,6 +47,11 @@ TODO, WISHLIST * Show current page in generic list template - DONE * Enable/disable ocr queue view & links - DONE +* Receive documents via email +* Mobile version +* Exif to metadata convertion +* Extract nagivation code into new navigation app +* External portal using sites contrib app Main ==== @@ -112,9 +117,6 @@ Documents * Unify document form classes * Use document preview code for staging file also - DONE * Delete physical file on delete method - DEFFERED (Not needed until Django 1.3) -* Receive documents via email -* Mobile version -* Exif to metadata convertion Filesystem serving ================== From bc6bff206e4e6bd3864de4ec12a2a9411014c6d1 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 16 Mar 2011 04:51:15 -0400 Subject: [PATCH 5/9] Updated FAQ with database index information --- docs/FAQ | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/FAQ b/docs/FAQ index 083af1b599..4b82cddde5 100644 --- a/docs/FAQ +++ b/docs/FAQ @@ -50,3 +50,9 @@ - Filesystem metadata indexing will not work with this storage backend as file are inside a MongoDB database and can't be linked (at least for now) + +* Site search is slow + - Add indexes to the following fields: + documents_document - description, recomended size: 160 + documents_documentmetadata - value, recomended size: 80 + documents_documentpage - content, recomended size: 3000 From 3bbcb9f4719546cb7da6c24513a5e040cc22ef1d Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 16 Mar 2011 04:51:51 -0400 Subject: [PATCH 6/9] Fixed list concatenation --- settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/settings.py b/settings.py index 6342e4bec2..3f0d15e093 100644 --- a/settings.py +++ b/settings.py @@ -293,7 +293,7 @@ if DEVELOPMENT: try: import debug_toolbar - #INSTALLED_APPS.append('debug_toolbar') + #INSTALLED_APPS +=('debug_toolbar',) except ImportError: #print 'debug_toolbar is not installed' pass @@ -302,7 +302,7 @@ if DEVELOPMENT: WSGI_AUTO_RELOAD = True if 'debug_toolbar' in INSTALLED_APPS: - MIDDLEWARE_CLASSES.append('debug_toolbar.middleware.DebugToolbarMiddleware') + MIDDLEWARE_CLASSES += ('debug_toolbar.middleware.DebugToolbarMiddleware',) DEBUG_TOOLBAR_CONFIG={ 'INTERCEPT_REDIRECTS' : False, } From 23635ccf9d3630d552bdfb5b21a103a247153ab6 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 16 Mar 2011 04:52:28 -0400 Subject: [PATCH 7/9] Added models fields indexing flags --- apps/documents/models.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/apps/documents/models.py b/apps/documents/models.py index e012c78633..8ccedfb329 100644 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -48,12 +48,12 @@ class Document(models.Model): file_mimetype = models.CharField(max_length=64, default='', editable=False) file_mime_encoding = models.CharField(max_length=64, default='', editable=False) #FAT filename can be up to 255 using LFN - file_filename = models.CharField(max_length=255, default='', editable=False) - file_extension = models.CharField(max_length=16, default='', editable=False) - date_added = models.DateTimeField(verbose_name=_(u'added'), auto_now_add=True) + file_filename = models.CharField(max_length=255, default='', editable=False, db_index=True) + file_extension = models.CharField(max_length=16, default='', editable=False, db_index=True) + date_added = models.DateTimeField(verbose_name=_(u'added'), auto_now_add=True, db_index=True) date_updated = models.DateTimeField(verbose_name=_(u'updated'), auto_now=True) checksum = models.TextField(blank=True, null=True, verbose_name=_(u'checksum'), editable=False) - description = models.TextField(blank=True, null=True, verbose_name=_(u'description')) + description = models.TextField(blank=True, null=True, verbose_name=_(u'description'), db_index=True) class Meta: verbose_name = _(u'document') @@ -264,7 +264,7 @@ class MetadataIndex(models.Model): class DocumentMetadata(models.Model): document = models.ForeignKey(Document, verbose_name=_(u'document')) metadata_type = models.ForeignKey(MetadataType, verbose_name=_(u'metadata type')) - value = models.TextField(blank=True, null=True, verbose_name=_(u'metadata value')) + value = models.TextField(blank=True, null=True, verbose_name=_(u'metadata value'), db_index=True) def __unicode__(self): return unicode(self.metadata_type) @@ -276,7 +276,7 @@ class DocumentMetadata(models.Model): class DocumentTypeFilename(models.Model): document_type = models.ForeignKey(DocumentType, verbose_name=_(u'document type')) - filename = models.CharField(max_length=128, verbose_name=_(u'filename')) + filename = models.CharField(max_length=128, verbose_name=_(u'filename'), db_index=True) enabled = models.BooleanField(default=True, verbose_name=_(u'enabled')) def __unicode__(self): @@ -290,7 +290,7 @@ class DocumentTypeFilename(models.Model): class DocumentPage(models.Model): document = models.ForeignKey(Document, verbose_name=_(u'document')) - content = models.TextField(blank=True, null=True, verbose_name=_(u'content')) + content = models.TextField(blank=True, null=True, verbose_name=_(u'content'), db_index=True) page_label = models.CharField(max_length=32, blank=True, null=True, verbose_name=_(u'page label')) page_number = models.PositiveIntegerField(default=1, editable=False, verbose_name=_(u'page number')) @@ -366,7 +366,7 @@ available_transformations = ([(name, data['label']) for name, data in AVAILABLE_ class DocumentPageTransformation(models.Model): document_page = models.ForeignKey(DocumentPage, verbose_name=_(u'document page')) - order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order')) + order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True) transformation = models.CharField(choices=available_transformations, max_length=128, verbose_name=_(u'transformation')) arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: {\'degrees\':90}')) From fda62e2b8650f456bab9a993d40fb15395a9b8c8 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 16 Mar 2011 04:57:27 -0400 Subject: [PATCH 8/9] Small optimization in document list view --- apps/documents/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/documents/views.py b/apps/documents/views.py index 4022ca4fe3..8027b8c753 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -70,7 +70,7 @@ def document_list(request): return object_list( request, - queryset=Document.objects.all(), + queryset=Document.objects.only('file_filename', 'file_filename', 'file_extension').all(), template_name='generic_list.html', extra_context={ 'title':_(u'documents'), From c9d82da28af4a325e7d42c53dc885da9972e0e4f Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 16 Mar 2011 04:57:59 -0400 Subject: [PATCH 9/9] Added indexing flags to ocr model --- apps/ocr/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/ocr/models.py b/apps/ocr/models.py index 1aec291869..c92251970e 100644 --- a/apps/ocr/models.py +++ b/apps/ocr/models.py @@ -44,7 +44,7 @@ class DocumentQueue(models.Model): class QueueDocument(models.Model): document_queue = models.ForeignKey(DocumentQueue, verbose_name=_(u'document queue')) document = models.ForeignKey(Document, verbose_name=_(u'document')) - datetime_submitted = models.DateTimeField(verbose_name=_(u'date time submitted'), auto_now_add=True) + datetime_submitted = models.DateTimeField(verbose_name=_(u'date time submitted'), auto_now_add=True, db_index=True) state = models.CharField(max_length=4, choices=QUEUEDOCUMENT_STATE_CHOICES, default=QUEUEDOCUMENT_STATE_PENDING,