diff --git a/apps/common/utils.py b/apps/common/utils.py index 21f3a6d777..47b3bbea29 100644 --- a/apps/common/utils.py +++ b/apps/common/utils.py @@ -69,14 +69,16 @@ def return_attrib(obj, attrib, arguments={}): #http://snippets.dzone.com/posts/show/5434 #http://snippets.dzone.com/user/jakob -def pretty_size(size): - suffixes = [('B',2**10), ('K',2**20), ('M',2**30), ('G',2**40), ('T',2**50)] +def pretty_size(size, suffixes = [('B',2**10), ('K',2**20), ('M',2**30), ('G',2**40), ('T',2**50)]): for suf, lim in suffixes: if size > lim: continue else: return round(size/float(lim/2**10),2).__str__()+suf +def pretty_size_10(size): + return pretty_size(size, suffixes = [('B',10**3), ('K',10**6), ('M',10**9), ('G',10**12), ('T',10**15)]) + def exists_with_famfam(path): try: diff --git a/apps/documents/statistics.py b/apps/documents/statistics.py new file mode 100644 index 0000000000..110ca67e44 --- /dev/null +++ b/apps/documents/statistics.py @@ -0,0 +1,51 @@ +from django.utils.translation import ugettext as _ +from documents.conf.settings import STORAGE_BACKEND + +from common.utils import pretty_size, pretty_size_10 + +from models import Document, DocumentType + + +def get_used_size(path, file_list): + total_size = 0 + for filename in file_list: + try: + total_size += STORAGE_BACKEND().size(STORAGE_BACKEND.separator.join([path, filename])) + except OSError: + pass + + return total_size + +def storage_count(path=u'.'): + directories, files = STORAGE_BACKEND().listdir(path) + total_count = len(files) + total_size = get_used_size(path, files) + + for directory in directories: + file_count, files_size = storage_count(directory) + total_count += file_count + total_size += files_size + + return total_count, total_size + + +def get_statistics(): + total_db_documents = Document.objects.only('pk',).count() + + + paragraphs = [ + _(u'Document types: %d') % DocumentType.objects.count(), + _(u'Documents in database: %d') % total_db_documents + ] + try: + total_storage_documents, storage_used_space = storage_count() + paragraphs.append(_(u'Documents in storage: %d') % total_storage_documents) + paragraphs.append(_(u'Space used in storage: %s (base 2), %s (base 10), %d bytes') % + (pretty_size(storage_used_space), pretty_size_10(storage_used_space), storage_used_space)) + except NotImplementedError: + pass + + return { + 'title':_(u'Document statistics'), + 'paragraphs': paragraphs + } diff --git a/apps/main/__init__.py b/apps/main/__init__.py index 36cc00ecdf..948a215893 100644 --- a/apps/main/__init__.py +++ b/apps/main/__init__.py @@ -15,7 +15,8 @@ check_settings = {'text':_(u'settings'), 'view':'check_settings', 'famfam':'cog' main_menu = [ {'text':_(u'home'), 'view':'home', 'famfam':'house', 'position':0}, {'text':_(u'tools'), 'view':'tools_menu', 'links': [ - document_find_all_duplicates, filesystem_serving_recreate_all_links + document_find_all_duplicates, filesystem_serving_recreate_all_links, + {'text':_(u'statistics'), 'view':'statistics', 'famfam':'table'} ],'famfam':'wrench', 'name':'tools','position':7}, {'text':_(u'setup'), 'view':'check_settings', 'links': [ diff --git a/apps/main/templates/statistics.html b/apps/main/templates/statistics.html new file mode 100644 index 0000000000..325b394ccf --- /dev/null +++ b/apps/main/templates/statistics.html @@ -0,0 +1,12 @@ +{% extends "base.html" %} +{% block title %} :: {{ title|capfirst }}{% endblock %} + +{% block content %} + {% for block in blocks %} + {% with block.title as title %} + {% with block.paragraphs as paragraphs %} + {% include "generic_subtemplate.html" %} + {% endwith %} + {% endwith %} + {% endfor %} +{% endblock %} diff --git a/apps/main/urls.py b/apps/main/urls.py index 8d0d3b631a..024868180b 100644 --- a/apps/main/urls.py +++ b/apps/main/urls.py @@ -4,5 +4,6 @@ from django.conf.urls.defaults import * urlpatterns = patterns('main.views', url(r'^$', 'home', (), 'home'), url(r'^check_settings/$', 'check_settings', (), 'check_settings'), - url(r'^tools_menu/$', 'blank_menu', (), 'tools_menu') + url(r'^tools_menu/$', 'blank_menu', (), 'tools_menu'), + url(r'^statistics/$', 'statistics', (), 'statistics'), ) diff --git a/apps/main/views.py b/apps/main/views.py index 2a65566127..7ff54079d5 100644 --- a/apps/main/views.py +++ b/apps/main/views.py @@ -8,8 +8,10 @@ from common.utils import exists_with_famfam from common.conf import settings as common_settings from documents.conf import settings as documents_settings +from documents.statistics import get_statistics as documents_statistics from converter.conf import settings as converter_settings from ocr.conf import settings as ocr_settings +from ocr.statistics import get_statistics as ocr_statistics from filesystem_serving.conf import settings as filesystem_serving_settings from dynamic_search.conf import settings as search_settings @@ -121,3 +123,16 @@ def blank_menu(request): ], }, context_instance=RequestContext(request)) + + +def statistics(request): + blocks = [] + blocks.append(documents_statistics()) + blocks.append(ocr_statistics()) + + return render_to_response('statistics.html', { + 'blocks':blocks, + 'title':_(u'Statistics') }, + context_instance=RequestContext(request)) + + diff --git a/apps/ocr/statistics.py b/apps/ocr/statistics.py new file mode 100644 index 0000000000..5391ed987c --- /dev/null +++ b/apps/ocr/statistics.py @@ -0,0 +1,16 @@ +from django.utils.translation import ugettext as _ + + +from models import DocumentQueue, QueueDocument + + +def get_statistics(): + paragraphs = [ + _(u'Document queues: %d') % DocumentQueue.objects.count(), + _(u'Queued documents: %d') % QueueDocument.objects.only('pk').count() + ] + + return { + 'title':_(u'OCR statistics'), + 'paragraphs': paragraphs + } diff --git a/apps/storage/backends/filebasedstorage.py b/apps/storage/backends/filebasedstorage.py index d6fefc810d..2a358f8426 100644 --- a/apps/storage/backends/filebasedstorage.py +++ b/apps/storage/backends/filebasedstorage.py @@ -1,8 +1,12 @@ +import os + from django.core.files.storage import FileSystemStorage from storage.conf.settings import FILESTORAGE_LOCATION class FileBasedStorage(FileSystemStorage): + separator = os.path.sep + def __init__(self, *args, **kwargs): super(FileBasedStorage, self).__init__(*args, **kwargs) self.location=FILESTORAGE_LOCATION diff --git a/apps/storage/backends/gridfsstorage.py b/apps/storage/backends/gridfsstorage.py index a1d0412f39..c7b9106f33 100644 --- a/apps/storage/backends/gridfsstorage.py +++ b/apps/storage/backends/gridfsstorage.py @@ -11,6 +11,8 @@ from storage.conf import settings class GridFSStorage(Storage): + separator = u'/' + def __init__(self, *args, **kwargs): self.db = Connection(host=settings.GRIDFS_HOST, port=settings.GRIDFS_PORT)[settings.GRIDFS_DATABASE_NAME] diff --git a/docs/Changelog.txt b/docs/Changelog.txt index 906671e54c..83e95d1b1b 100644 --- a/docs/Changelog.txt +++ b/docs/Changelog.txt @@ -1,3 +1,6 @@ +2011-Mar-20 +* Added simple statistics page (total used storage, total docs, etc) + 2011-Mar-18 * Implemented form based and button based multi item actions (button based by default) * Added multi document delete diff --git a/docs/TODO b/docs/TODO index fb332ad7d3..33b3b83f7a 100644 --- a/docs/TODO +++ b/docs/TODO @@ -109,7 +109,7 @@ Documents * Separate free form document rename and require new permission * Test zip file upload with multi directories zip file * Don't append an extension separator if extension is non existant -* Statistics page (total used storage, total docs, per metadata group, per type, per metadata) +* Statistics page (total used storage, total docs, per metadata group, per type, per metadata) - DONE * Improve doc page template/view * Document page edit view * Show all document's pages content combined @@ -174,3 +174,4 @@ OCR * Support cuneiform * Implement StringIO * Add storage replication delay setting +* Advanced statistics (OCR average completion time, error rate)