Finish statistics subsystem refactor.

This commit is contained in:
Roberto Rosario
2015-09-14 21:57:55 -04:00
parent 7687618ea6
commit c1619c93e0
13 changed files with 243 additions and 130 deletions

View File

@@ -230,6 +230,7 @@ Other changes
* setting_view_permission added to the smart settings app. It is no longer required to be a super admin or staff user to see the setting values.
* Removal of the CombinedSource class.
* Reduction of text strings.
* Statistics refactor; purgestatistics command
Upgrading from a previous version
=================================

View File

@@ -74,7 +74,8 @@ from .permissions import (
from .settings import setting_thumbnail_size
from .statistics import (
new_documents_per_month, new_document_pages_per_month,
new_document_versions_per_month
new_document_versions_per_month, total_document_per_month,
total_document_page_per_month, total_document_version_per_month
)
from .widgets import document_thumbnail
@@ -352,24 +353,48 @@ class DocumentsApp(MayanAppConfig):
links=(link_transformation_list,), sources=(DocumentPage,)
)
namespace = StatisticNamespace(name='documents', label=_('Documents'))
namespace = StatisticNamespace(slug='documents', label=_('Documents'))
namespace.add_statistic(
slug='new-documents-per-month',
label=_('New documents per month'),
func=new_documents_per_month,
renderer=CharJSLine
renderer=CharJSLine,
minute='0'
)
namespace.add_statistic(
slug='new-document-versions-per-month',
label=_('New document versions per month'),
func=new_document_versions_per_month,
renderer=CharJSLine
renderer=CharJSLine,
minute='0'
)
namespace.add_statistic(
slug='new-document-pages-per-month',
label=_('New document pages per month'),
func=new_document_pages_per_month,
renderer=CharJSLine
renderer=CharJSLine,
minute='0'
)
namespace.add_statistic(
slug='total-documents-at-each-month',
label=_('Total documents at each month'),
func=total_document_per_month,
renderer=CharJSLine,
minute='0'
)
namespace.add_statistic(
slug='total-document-versions-at-each-month',
label=_('Total document versions at each month'),
func=total_document_version_per_month,
renderer=CharJSLine,
minute='0'
)
namespace.add_statistic(
slug='total-document-pages-at-each-month',
label=_('Total document pages at each month'),
func=total_document_page_per_month,
renderer=CharJSLine,
minute='0'
)
post_initial_setup.connect(

View File

@@ -5,45 +5,11 @@ import datetime
import qsstats
from django.db.models import Avg, Count, Max, Min
from django.template.defaultfilters import filesizeformat
from django.utils import formats
from django.utils.encoding import force_text
from django.utils.translation import ugettext_lazy as _
from statistics.classes import Statistic
from .models import Document, DocumentType, DocumentPage, DocumentVersion
from .runtime import storage_backend
def get_used_size(path, file_list):
total_size = 0
for filename in file_list:
try:
total_size += storage_backend.size(
storage_backend.separator.join([path, filename])
)
except OSError:
pass
return total_size
def storage_count(path='.'):
try:
directories, files = storage_backend.listdir(path)
except OSError:
return 0, 0
else:
total_count = len(files)
total_size = get_used_size(path, files)
for directory in directories:
file_count, files_size = storage_count(directory)
total_count += file_count
total_size += files_size
return total_count, total_size
def new_documents_per_month():
@@ -54,7 +20,7 @@ def new_documents_per_month():
return {
'series': {
'Document': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months'))
'Documents': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months'))
}
}
@@ -67,7 +33,7 @@ def new_document_versions_per_month():
return {
'series': {
'Document': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months'))
'Versions': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months'))
}
}
@@ -80,45 +46,83 @@ def new_document_pages_per_month():
return {
'series': {
'Document': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months'))
'Pages': map(lambda x: {x[0].month: x[1]}, qss.time_series(start=this_year, end=today, interval='months'))
}
}
def total_document_per_month():
qss = qsstats.QuerySetStats(Document.objects.all(), 'date_added')
this_year = datetime.date.today().year
result = []
for month in range(1, datetime.date.today().month + 1):
next_month = month + 1
print month
if next_month == 12:
next_month = 1
year = this_year + 1
else:
next_month = month + 1
year = this_year
result.append({month: qss.until(datetime.date(year, next_month, 1))})
return {
'series': {
'Documents': result
}
}
"""
def total_document_version_per_month():
qss = qsstats.QuerySetStats(DocumentVersion.objects.all(), 'document__date_added')
this_year = datetime.date.today().year
class DocumentUsageStatistics(Statistic):
def get_results(self):
results = []
result = []
total_db_documents = Document.objects.only('pk',).count()
for month in range(1, datetime.date.today().month + 1):
next_month = month + 1
print month
results.extend(
[
_('Documents in database: %d') % total_db_documents,
]
)
if next_month == 12:
next_month = 1
year = this_year + 1
else:
next_month = month + 1
year = this_year
try:
total_storage_documents, storage_used_space = storage_count()
results.append(
_('Documents in storage: %d') % total_storage_documents
)
results.append(
_(
'Space used in storage: %s'
) % filesizeformat(storage_used_space)
)
except NotImplementedError:
pass
result.append({month: qss.until(datetime.date(year, next_month, 1))})
results.extend(
[
_(
'Document pages in database: %d'
) % DocumentPage.objects.only('pk',).count(),
]
)
return {
'series': {
'Versions': result
}
}
return results
"""
def total_document_page_per_month():
qss = qsstats.QuerySetStats(DocumentPage.objects.all(), 'document_version__document__date_added')
this_year = datetime.date.today().year
result = []
for month in range(1, datetime.date.today().month + 1):
next_month = month + 1
print month
if next_month == 12:
next_month = 1
year = this_year + 1
else:
next_month = month + 1
year = this_year
result.append({month: qss.until(datetime.date(year, next_month, 1))})
return {
'series': {
'Pages': result
}
}

View File

@@ -1,7 +1,6 @@
from __future__ import unicode_literals
from datetime import timedelta
from celery.schedules import crontab
from kombu import Exchange, Queue
from django.utils.translation import ugettext_lazy as _
@@ -9,13 +8,14 @@ from django.utils.translation import ugettext_lazy as _
from mayan.celery import app
from common import MayanAppConfig, menu_object, menu_secondary, menu_tools
from navigation import SourceColumn
from .classes import Statistic, StatisticNamespace
from .links import (
link_execute, link_namespace_details, link_namespace_list,
link_statistics
link_statistics, link_view
)
from .literals import STATISTICS_REFRESH_INTERVAL
from .tasks import task_check_statistics # NOQA - Force registration of task
from .tasks import task_execute_statistic # NOQA - Force registration of task
class StatisticsApp(MayanAppConfig):
@@ -25,13 +25,10 @@ class StatisticsApp(MayanAppConfig):
def ready(self):
super(StatisticsApp, self).ready()
app.conf.CELERYBEAT_SCHEDULE.update(
{
'statistics.task_check_statistics': {
'task': 'statistics.tasks.task_check_statistics',
'schedule': timedelta(seconds=STATISTICS_REFRESH_INTERVAL),
},
}
SourceColumn(
source=Statistic,
label=_('Schedule'),
attribute='schedule',
)
app.conf.CELERY_QUEUES.extend(
@@ -43,15 +40,7 @@ class StatisticsApp(MayanAppConfig):
)
)
app.conf.CELERY_ROUTES.update(
{
'statistics.tasks.task_check_statistics': {
'queue': 'statistics'
},
}
)
menu_object.bind_links(links=(link_execute,), sources=(Statistic,))
menu_object.bind_links(links=(link_execute, link_view), sources=(Statistic,))
menu_object.bind_links(
links=(link_namespace_details,), sources=(StatisticNamespace,)
)

View File

@@ -2,6 +2,11 @@ from __future__ import unicode_literals
import json
from celery.schedules import crontab
from djcelery.models import PeriodicTask
from mayan.celery import app
from .models import StatisticResult
@@ -13,14 +18,14 @@ class StatisticNamespace(object):
return cls._registry.values()
@classmethod
def get(cls, name):
return cls._registry[name]
def get(cls, slug):
return cls._registry[slug]
def __init__(self, name, label):
self.name = name
def __init__(self, slug, label):
self.slug = slug
self.label = label
self._statistics = []
self.__class__._registry[name] = self
self.__class__._registry[slug] = self
def __unicode__(self):
return unicode(self.label)
@@ -30,10 +35,6 @@ class StatisticNamespace(object):
statistic.namespace = self
self._statistics.append(statistic)
@property
def id(self):
return self.name
@property
def statistics(self):
return self._statistics
@@ -42,6 +43,22 @@ class StatisticNamespace(object):
class Statistic(object):
_registry = {}
@staticmethod
def purge_schedules():
queryset = PeriodicTask.objects.filter(name__startswith='statistics.').exclude(name__in=Statistic.get_task_names())
for periodic_task in queryset:
crontab_instance = periodic_task.crontab
periodic_task.delete()
if crontab_instance and not crontab_instance.periodictask_set.all():
# Only delete the interval if nobody else is using it
crontab_instance.delete()
StatisticResult.objects.filter(
slug__in=queryset.values_list('name', flat=True)
).delete()
@classmethod
def get_all(cls):
return cls._registry.values()
@@ -50,11 +67,39 @@ class Statistic(object):
def get(cls, slug):
return cls._registry[slug]
def __init__(self, slug, label, func, renderer):
@classmethod
def get_task_names(cls):
return [task.get_task_name() for task in cls.get_all()]
def __init__(self, slug, label, func, renderer, minute='*', hour='*', day_of_week='*', day_of_month='*', month_of_year='*'):
self.slug = slug
self.label = label
self.func = func
self.renderer = renderer
self.schedule = crontab(
minute=minute, hour=hour, day_of_week=day_of_week,
day_of_month=day_of_month, month_of_year=month_of_year,
)
app.conf.CELERYBEAT_SCHEDULE.update(
{
self.get_task_name(): {
'task': 'statistics.tasks.task_execute_statistic',
'schedule': self.schedule,
'args': (self.slug,)
},
}
)
app.conf.CELERY_ROUTES.update(
{
self.get_task_name(): {
'queue': 'statistics'
},
}
)
self.__class__._registry[slug] = self
def __unicode__(self):
@@ -63,14 +108,13 @@ class Statistic(object):
def execute(self):
self.store_results(results=self.func())
@property
def id(self):
return self.slug
def get_task_name(self):
return 'statistics.task_execute_statistic_{}'.format(self.slug)
def store_results(self, results):
StatisticResult.objects.filter(slug=self.slug).delete()
statistic_result = StatisticResult.objects.create(slug=self.slug)
statistic_result, created = StatisticResult.objects.get_or_create(slug=self.slug)
statistic_result.store_data(data=results)
def get_results(self):
@@ -95,6 +139,14 @@ class CharJSLine(ChartRenderer):
template_name = 'statistics/backends/chartjs/line.html'
dataset_palette = (
{
'fillColor': "rgba(220,220,220,0.2)",
'strokeColor': "rgba(220,220,220,1)",
'pointColor': "rgba(220,220,220,1)",
'pointStrokeColor': "#fff",
'pointHighlightFill': "#fff",
'pointHighlightStroke': "rgba(220,220,220,1)",
},
{
'fillColor': "rgba(151,187,205,0.2)",
'strokeColor': "rgba(151,187,205,1)",
@@ -103,14 +155,6 @@ class CharJSLine(ChartRenderer):
'pointHighlightFill': "#fff",
'pointHighlightStroke': "rgba(151,187,205,1)",
},
{
'fillColor': "rgba(220,220,220,0.2)",
'strokeColor': "rgba(220,220,220,1)",
'pointColor': "rgba(220,220,220,1)",
'pointStrokeColor': "#fff",
'pointHighlightFill': "#fff",
'pointHighlightStroke': "rgba(220,220,220,1)",
}
)
def get_chart_data(self):

View File

@@ -8,12 +8,16 @@ from .permissions import permission_statistics_view
link_execute = Link(
permissions=(permission_statistics_view,), text=_('Queue'),
view='statistics:statistic_queue', args='resolved_object.slug'
)
link_view = Link(
permissions=(permission_statistics_view,), text=_('View'),
view='statistics:statistic_detail', args='resolved_object.id'
view='statistics:statistic_detail', args='resolved_object.slug'
)
link_namespace_details = Link(
permissions=(permission_statistics_view,), text=_('Namespace details'),
view='statistics:namespace_details', args='resolved_object.id'
view='statistics:namespace_details', args='resolved_object.slug'
)
link_namespace_list = Link(
permissions=(permission_statistics_view,), text=_('Namespace list'),

View File

@@ -1,3 +0,0 @@
from __future__ import unicode_literals
STATISTICS_REFRESH_INTERVAL = 60 * 60 * 24 # Every 12 hours

View File

@@ -0,0 +1,12 @@
from __future__ import unicode_literals
from django.core.management.base import BaseCommand
from ...classes import Statistic
class Command(BaseCommand):
help = 'Remove obsolete statistics scheduled and results from the database'
def handle(self, *args, **options):
Statistic.purge_schedules()

View File

@@ -4,17 +4,15 @@ import logging
from mayan.celery import app
from .classes import StatisticNamespace
from .classes import Statistic
logger = logging.getLogger(__name__)
@app.task(ignore_result=True)
def task_check_statistics():
def task_execute_statistic(slug):
logger.info('Executing')
for namespace in StatisticNamespace.get_all():
for statistic in namespace.statistics:
statistic.execute()
Statistic.get(slug=slug).execute()
logger.info('Finshed')

View File

@@ -2,17 +2,24 @@ from __future__ import unicode_literals
from django.conf.urls import patterns, url
from .views import NamespaceDetailView, NamespaceListView, StatisticDetailView
from .views import (
NamespaceDetailView, NamespaceListView, StatisticDetailView,
StatisticQueueView
)
urlpatterns = patterns(
'statistics.views',
url(r'^$', NamespaceListView.as_view(), name='namespace_list'),
url(
r'^namespace/(?P<namespace_id>\w+)/details/$',
r'^namespace/(?P<slug>[\w-]+)/details/$',
NamespaceDetailView.as_view(), name='namespace_details'
),
url(
r'^(?P<slug>[\w-]+)/view/$', StatisticDetailView.as_view(),
name='statistic_detail'
),
url(
r'^(?P<slug>[\w-]+)/queue/$', StatisticQueueView.as_view(),
name='statistic_queue'
),
)

View File

@@ -2,13 +2,18 @@ from __future__ import unicode_literals
import json
from django.core.urlresolvers import reverse
from django.http import Http404
from django.shortcuts import HttpResponseRedirect
from django.utils.translation import ugettext_lazy as _
from common.views import SingleObjectDetailView, SingleObjectListView
from common.generics import (
ConfirmView, SingleObjectDetailView, SingleObjectListView
)
from .classes import Statistic, StatisticNamespace
from .permissions import permission_statistics_view
from .tasks import task_execute_statistic
class NamespaceListView(SingleObjectListView):
@@ -34,7 +39,7 @@ class NamespaceDetailView(SingleObjectListView):
}
def get_namespace(self):
return StatisticNamespace.get(self.kwargs['namespace_id'])
return StatisticNamespace.get(self.kwargs['slug'])
def get_queryset(self):
return self.get_namespace().statistics
@@ -61,3 +66,30 @@ class StatisticDetailView(SingleObjectDetailView):
def get_template_names(self):
return (self.get_object().renderer.template_name,)
class StatisticQueueView(ConfirmView):
view_permission = permission_statistics_view
def get_extra_context(self):
return {
'namespace': self.get_object().namespace,
'object': self.get_object(),
'title': _('Queue statistic "%s" to be updated?') % self.get_object(),
}
def get_object(self):
try:
return Statistic.get(self.kwargs['slug'])
except KeyError:
raise Http404(_('Statistic "%s" not found.') % self.kwargs['slug'])
def get_post_action_redirect(self):
return reverse(
'statistics:namespace_details',
args=(self.get_object().namespace.slug,)
)
def post(self, request, *args, **kwargs):
task_execute_statistic.delay(slug=self.get_object().slug)
return HttpResponseRedirect(self.get_post_action_redirect())