Finish statistics subsystem refactor.

This commit is contained in:
Roberto Rosario
2015-09-14 21:57:55 -04:00
parent 7687618ea6
commit c1619c93e0
13 changed files with 243 additions and 130 deletions

View File

@@ -230,6 +230,7 @@ Other changes
* setting_view_permission added to the smart settings app. It is no longer required to be a super admin or staff user to see the setting values. * setting_view_permission added to the smart settings app. It is no longer required to be a super admin or staff user to see the setting values.
* Removal of the CombinedSource class. * Removal of the CombinedSource class.
* Reduction of text strings. * Reduction of text strings.
* Statistics refactor; purgestatistics command
Upgrading from a previous version Upgrading from a previous version
================================= =================================

View File

@@ -74,7 +74,8 @@ from .permissions import (
from .settings import setting_thumbnail_size from .settings import setting_thumbnail_size
from .statistics import ( from .statistics import (
new_documents_per_month, new_document_pages_per_month, new_documents_per_month, new_document_pages_per_month,
new_document_versions_per_month new_document_versions_per_month, total_document_per_month,
total_document_page_per_month, total_document_version_per_month
) )
from .widgets import document_thumbnail from .widgets import document_thumbnail
@@ -352,24 +353,48 @@ class DocumentsApp(MayanAppConfig):
links=(link_transformation_list,), sources=(DocumentPage,) links=(link_transformation_list,), sources=(DocumentPage,)
) )
namespace = StatisticNamespace(name='documents', label=_('Documents')) namespace = StatisticNamespace(slug='documents', label=_('Documents'))
namespace.add_statistic( namespace.add_statistic(
slug='new-documents-per-month', slug='new-documents-per-month',
label=_('New documents per month'), label=_('New documents per month'),
func=new_documents_per_month, func=new_documents_per_month,
renderer=CharJSLine renderer=CharJSLine,
minute='0'
) )
namespace.add_statistic( namespace.add_statistic(
slug='new-document-versions-per-month', slug='new-document-versions-per-month',
label=_('New document versions per month'), label=_('New document versions per month'),
func=new_document_versions_per_month, func=new_document_versions_per_month,
renderer=CharJSLine renderer=CharJSLine,
minute='0'
) )
namespace.add_statistic( namespace.add_statistic(
slug='new-document-pages-per-month', slug='new-document-pages-per-month',
label=_('New document pages per month'), label=_('New document pages per month'),
func=new_document_pages_per_month, func=new_document_pages_per_month,
renderer=CharJSLine renderer=CharJSLine,
minute='0'
)
namespace.add_statistic(
slug='total-documents-at-each-month',
label=_('Total documents at each month'),
func=total_document_per_month,
renderer=CharJSLine,
minute='0'
)
namespace.add_statistic(
slug='total-document-versions-at-each-month',
label=_('Total document versions at each month'),
func=total_document_version_per_month,
renderer=CharJSLine,
minute='0'
)
namespace.add_statistic(
slug='total-document-pages-at-each-month',
label=_('Total document pages at each month'),
func=total_document_page_per_month,
renderer=CharJSLine,
minute='0'
) )
post_initial_setup.connect( post_initial_setup.connect(

View File

@@ -5,45 +5,11 @@ import datetime
import qsstats import qsstats
from django.db.models import Avg, Count, Max, Min from django.db.models import Avg, Count, Max, Min
from django.template.defaultfilters import filesizeformat
from django.utils import formats
from django.utils.encoding import force_text
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from statistics.classes import Statistic from statistics.classes import Statistic
from .models import Document, DocumentType, DocumentPage, DocumentVersion from .models import Document, DocumentType, DocumentPage, DocumentVersion
from .runtime import storage_backend
def get_used_size(path, file_list):
total_size = 0
for filename in file_list:
try:
total_size += storage_backend.size(
storage_backend.separator.join([path, filename])
)
except OSError:
pass
return total_size
def storage_count(path='.'):
try:
directories, files = storage_backend.listdir(path)
except OSError:
return 0, 0
else:
total_count = len(files)
total_size = get_used_size(path, files)
for directory in directories:
file_count, files_size = storage_count(directory)
total_count += file_count
total_size += files_size
return total_count, total_size
def new_documents_per_month(): def new_documents_per_month():
@@ -54,7 +20,7 @@ def new_documents_per_month():
return { return {
'series': { 'series': {
'Document': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months')) 'Documents': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months'))
} }
} }
@@ -67,7 +33,7 @@ def new_document_versions_per_month():
return { return {
'series': { 'series': {
'Document': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months')) 'Versions': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months'))
} }
} }
@@ -80,45 +46,83 @@ def new_document_pages_per_month():
return { return {
'series': { 'series': {
'Document': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months')) 'Pages': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months'))
}
}
def total_document_per_month():
qss = qsstats.QuerySetStats(Document.objects.all(), 'date_added')
this_year = datetime.date.today().year
result = []
for month in range(1, datetime.date.today().month + 1):
next_month = month + 1
print month
if next_month == 12:
next_month = 1
year = this_year + 1
else:
next_month = month + 1
year = this_year
result.append({month: qss.until(datetime.date(year, next_month, 1))})
return {
'series': {
'Documents': result
} }
} }
""" def total_document_version_per_month():
qss = qsstats.QuerySetStats(DocumentVersion.objects.all(), 'document__date_added')
this_year = datetime.date.today().year
class DocumentUsageStatistics(Statistic): result = []
def get_results(self):
results = []
total_db_documents = Document.objects.only('pk',).count() for month in range(1, datetime.date.today().month + 1):
next_month = month + 1
print month
results.extend( if next_month == 12:
[ next_month = 1
_('Documents in database: %d') % total_db_documents, year = this_year + 1
] else:
) next_month = month + 1
year = this_year
try: result.append({month: qss.until(datetime.date(year, next_month, 1))})
total_storage_documents, storage_used_space = storage_count()
results.append(
_('Documents in storage: %d') % total_storage_documents
)
results.append(
_(
'Space used in storage: %s'
) % filesizeformat(storage_used_space)
)
except NotImplementedError:
pass
results.extend( return {
[ 'series': {
_( 'Versions': result
'Document pages in database: %d' }
) % DocumentPage.objects.only('pk',).count(), }
]
)
return results
""" def total_document_page_per_month():
qss = qsstats.QuerySetStats(DocumentPage.objects.all(), 'document_version__document__date_added')
this_year = datetime.date.today().year
result = []
for month in range(1, datetime.date.today().month + 1):
next_month = month + 1
print month
if next_month == 12:
next_month = 1
year = this_year + 1
else:
next_month = month + 1
year = this_year
result.append({month: qss.until(datetime.date(year, next_month, 1))})
return {
'series': {
'Pages': result
}
}

View File

@@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from datetime import timedelta from celery.schedules import crontab
from kombu import Exchange, Queue from kombu import Exchange, Queue
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
@@ -9,13 +8,14 @@ from django.utils.translation import ugettext_lazy as _
from mayan.celery import app from mayan.celery import app
from common import MayanAppConfig, menu_object, menu_secondary, menu_tools from common import MayanAppConfig, menu_object, menu_secondary, menu_tools
from navigation import SourceColumn
from .classes import Statistic, StatisticNamespace from .classes import Statistic, StatisticNamespace
from .links import ( from .links import (
link_execute, link_namespace_details, link_namespace_list, link_execute, link_namespace_details, link_namespace_list,
link_statistics link_statistics, link_view
) )
from .literals import STATISTICS_REFRESH_INTERVAL from .tasks import task_execute_statistic # NOQA - Force registration of task
from .tasks import task_check_statistics # NOQA - Force registration of task
class StatisticsApp(MayanAppConfig): class StatisticsApp(MayanAppConfig):
@@ -25,13 +25,10 @@ class StatisticsApp(MayanAppConfig):
def ready(self): def ready(self):
super(StatisticsApp, self).ready() super(StatisticsApp, self).ready()
app.conf.CELERYBEAT_SCHEDULE.update( SourceColumn(
{ source=Statistic,
'statistics.task_check_statistics': { label=_('Schedule'),
'task': 'statistics.tasks.task_check_statistics', attribute='schedule',
'schedule': timedelta(seconds=STATISTICS_REFRESH_INTERVAL),
},
}
) )
app.conf.CELERY_QUEUES.extend( app.conf.CELERY_QUEUES.extend(
@@ -43,15 +40,7 @@ class StatisticsApp(MayanAppConfig):
) )
) )
app.conf.CELERY_ROUTES.update( menu_object.bind_links(links=(link_execute, link_view), sources=(Statistic,))
{
'statistics.tasks.task_check_statistics': {
'queue': 'statistics'
},
}
)
menu_object.bind_links(links=(link_execute,), sources=(Statistic,))
menu_object.bind_links( menu_object.bind_links(
links=(link_namespace_details,), sources=(StatisticNamespace,) links=(link_namespace_details,), sources=(StatisticNamespace,)
) )

View File

@@ -2,6 +2,11 @@ from __future__ import unicode_literals
import json import json
from celery.schedules import crontab
from djcelery.models import PeriodicTask
from mayan.celery import app
from .models import StatisticResult from .models import StatisticResult
@@ -13,14 +18,14 @@ class StatisticNamespace(object):
return cls._registry.values() return cls._registry.values()
@classmethod @classmethod
def get(cls, name): def get(cls, slug):
return cls._registry[name] return cls._registry[slug]
def __init__(self, name, label): def __init__(self, slug, label):
self.name = name self.slug = slug
self.label = label self.label = label
self._statistics = [] self._statistics = []
self.__class__._registry[name] = self self.__class__._registry[slug] = self
def __unicode__(self): def __unicode__(self):
return unicode(self.label) return unicode(self.label)
@@ -30,10 +35,6 @@ class StatisticNamespace(object):
statistic.namespace = self statistic.namespace = self
self._statistics.append(statistic) self._statistics.append(statistic)
@property
def id(self):
return self.name
@property @property
def statistics(self): def statistics(self):
return self._statistics return self._statistics
@@ -42,6 +43,22 @@ class StatisticNamespace(object):
class Statistic(object): class Statistic(object):
_registry = {} _registry = {}
@staticmethod
def purge_schedules():
queryset = PeriodicTask.objects.filter(name__startswith='statistics.').exclude(name__in=Statistic.get_task_names())
for periodic_task in queryset:
crontab_instance = periodic_task.crontab
periodic_task.delete()
if crontab_instance and not crontab_instance.periodictask_set.all():
# Only delete the interval if nobody else is using it
crontab_instance.delete()
StatisticResult.objects.filter(
slug__in=queryset.values_list('name', flat=True)
).delete()
@classmethod @classmethod
def get_all(cls): def get_all(cls):
return cls._registry.values() return cls._registry.values()
@@ -50,11 +67,39 @@ class Statistic(object):
def get(cls, slug): def get(cls, slug):
return cls._registry[slug] return cls._registry[slug]
def __init__(self, slug, label, func, renderer): @classmethod
def get_task_names(cls):
return [task.get_task_name() for task in cls.get_all()]
def __init__(self, slug, label, func, renderer, minute='*', hour='*', day_of_week='*', day_of_month='*', month_of_year='*'):
self.slug = slug self.slug = slug
self.label = label self.label = label
self.func = func self.func = func
self.renderer = renderer self.renderer = renderer
self.schedule = crontab(
minute=minute, hour=hour, day_of_week=day_of_week,
day_of_month=day_of_month, month_of_year=month_of_year,
)
app.conf.CELERYBEAT_SCHEDULE.update(
{
self.get_task_name(): {
'task': 'statistics.tasks.task_execute_statistic',
'schedule': self.schedule,
'args': (self.slug,)
},
}
)
app.conf.CELERY_ROUTES.update(
{
self.get_task_name(): {
'queue': 'statistics'
},
}
)
self.__class__._registry[slug] = self self.__class__._registry[slug] = self
def __unicode__(self): def __unicode__(self):
@@ -63,14 +108,13 @@ class Statistic(object):
def execute(self): def execute(self):
self.store_results(results=self.func()) self.store_results(results=self.func())
@property def get_task_name(self):
def id(self): return 'statistics.task_execute_statistic_{}'.format(self.slug)
return self.slug
def store_results(self, results): def store_results(self, results):
StatisticResult.objects.filter(slug=self.slug).delete() StatisticResult.objects.filter(slug=self.slug).delete()
statistic_result = StatisticResult.objects.create(slug=self.slug) statistic_result, created = StatisticResult.objects.get_or_create(slug=self.slug)
statistic_result.store_data(data=results) statistic_result.store_data(data=results)
def get_results(self): def get_results(self):
@@ -95,6 +139,14 @@ class CharJSLine(ChartRenderer):
template_name = 'statistics/backends/chartjs/line.html' template_name = 'statistics/backends/chartjs/line.html'
dataset_palette = ( dataset_palette = (
{
'fillColor': "rgba(220,220,220,0.2)",
'strokeColor': "rgba(220,220,220,1)",
'pointColor': "rgba(220,220,220,1)",
'pointStrokeColor': "#fff",
'pointHighlightFill': "#fff",
'pointHighlightStroke': "rgba(220,220,220,1)",
},
{ {
'fillColor': "rgba(151,187,205,0.2)", 'fillColor': "rgba(151,187,205,0.2)",
'strokeColor': "rgba(151,187,205,1)", 'strokeColor': "rgba(151,187,205,1)",
@@ -103,14 +155,6 @@ class CharJSLine(ChartRenderer):
'pointHighlightFill': "#fff", 'pointHighlightFill': "#fff",
'pointHighlightStroke': "rgba(151,187,205,1)", 'pointHighlightStroke': "rgba(151,187,205,1)",
}, },
{
'fillColor': "rgba(220,220,220,0.2)",
'strokeColor': "rgba(220,220,220,1)",
'pointColor': "rgba(220,220,220,1)",
'pointStrokeColor': "#fff",
'pointHighlightFill': "#fff",
'pointHighlightStroke': "rgba(220,220,220,1)",
}
) )
def get_chart_data(self): def get_chart_data(self):

View File

@@ -8,12 +8,16 @@ from .permissions import permission_statistics_view
link_execute = Link( link_execute = Link(
permissions=(permission_statistics_view,), text=_('Queue'),
view='statistics:statistic_queue', args='resolved_object.slug'
)
link_view = Link(
permissions=(permission_statistics_view,), text=_('View'), permissions=(permission_statistics_view,), text=_('View'),
view='statistics:statistic_detail', args='resolved_object.id' view='statistics:statistic_detail', args='resolved_object.slug'
) )
link_namespace_details = Link( link_namespace_details = Link(
permissions=(permission_statistics_view,), text=_('Namespace details'), permissions=(permission_statistics_view,), text=_('Namespace details'),
view='statistics:namespace_details', args='resolved_object.id' view='statistics:namespace_details', args='resolved_object.slug'
) )
link_namespace_list = Link( link_namespace_list = Link(
permissions=(permission_statistics_view,), text=_('Namespace list'), permissions=(permission_statistics_view,), text=_('Namespace list'),

View File

@@ -1,3 +0,0 @@
from __future__ import unicode_literals
STATISTICS_REFRESH_INTERVAL = 60 * 60 * 24 # Every 12 hours

View File

@@ -0,0 +1,12 @@
from __future__ import unicode_literals
from django.core.management.base import BaseCommand
from ...classes import Statistic
class Command(BaseCommand):
help = 'Remove obsolete statistics scheduled and results from the database'
def handle(self, *args, **options):
Statistic.purge_schedules()

View File

@@ -4,17 +4,15 @@ import logging
from mayan.celery import app from mayan.celery import app
from .classes import StatisticNamespace from .classes import Statistic
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@app.task(ignore_result=True) @app.task(ignore_result=True)
def task_check_statistics(): def task_execute_statistic(slug):
logger.info('Executing') logger.info('Executing')
for namespace in StatisticNamespace.get_all(): Statistic.get(slug=slug).execute()
for statistic in namespace.statistics:
statistic.execute()
logger.info('Finshed') logger.info('Finshed')

View File

@@ -2,17 +2,24 @@ from __future__ import unicode_literals
from django.conf.urls import patterns, url from django.conf.urls import patterns, url
from .views import NamespaceDetailView, NamespaceListView, StatisticDetailView from .views import (
NamespaceDetailView, NamespaceListView, StatisticDetailView,
StatisticQueueView
)
urlpatterns = patterns( urlpatterns = patterns(
'statistics.views', 'statistics.views',
url(r'^$', NamespaceListView.as_view(), name='namespace_list'), url(r'^$', NamespaceListView.as_view(), name='namespace_list'),
url( url(
r'^namespace/(?P<namespace_id>\w+)/details/$', r'^namespace/(?P<slug>[\w-]+)/details/$',
NamespaceDetailView.as_view(), name='namespace_details' NamespaceDetailView.as_view(), name='namespace_details'
), ),
url( url(
r'^(?P<slug>[\w-]+)/view/$', StatisticDetailView.as_view(), r'^(?P<slug>[\w-]+)/view/$', StatisticDetailView.as_view(),
name='statistic_detail' name='statistic_detail'
), ),
url(
r'^(?P<slug>[\w-]+)/queue/$', StatisticQueueView.as_view(),
name='statistic_queue'
),
) )

View File

@@ -2,13 +2,18 @@ from __future__ import unicode_literals
import json import json
from django.core.urlresolvers import reverse
from django.http import Http404 from django.http import Http404
from django.shortcuts import HttpResponseRedirect
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from common.views import SingleObjectDetailView, SingleObjectListView from common.generics import (
ConfirmView, SingleObjectDetailView, SingleObjectListView
)
from .classes import Statistic, StatisticNamespace from .classes import Statistic, StatisticNamespace
from .permissions import permission_statistics_view from .permissions import permission_statistics_view
from .tasks import task_execute_statistic
class NamespaceListView(SingleObjectListView): class NamespaceListView(SingleObjectListView):
@@ -34,7 +39,7 @@ class NamespaceDetailView(SingleObjectListView):
} }
def get_namespace(self): def get_namespace(self):
return StatisticNamespace.get(self.kwargs['namespace_id']) return StatisticNamespace.get(self.kwargs['slug'])
def get_queryset(self): def get_queryset(self):
return self.get_namespace().statistics return self.get_namespace().statistics
@@ -61,3 +66,30 @@ class StatisticDetailView(SingleObjectDetailView):
def get_template_names(self): def get_template_names(self):
return (self.get_object().renderer.template_name,) return (self.get_object().renderer.template_name,)
class StatisticQueueView(ConfirmView):
view_permission = permission_statistics_view
def get_extra_context(self):
return {
'namespace': self.get_object().namespace,
'object': self.get_object(),
'title': _('Queue statistic "%s" to be updated?') % self.get_object(),
}
def get_object(self):
try:
return Statistic.get(self.kwargs['slug'])
except KeyError:
raise Http404(_('Statistic "%s" not found.') % self.kwargs['slug'])
def get_post_action_redirect(self):
return reverse(
'statistics:namespace_details',
args=(self.get_object().namespace.slug,)
)
def post(self, request, *args, **kwargs):
task_execute_statistic.delay(slug=self.get_object().slug)
return HttpResponseRedirect(self.get_post_action_redirect())