From 9c6f10cc1c65713ace1e1790e276b2525ec8457c Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Sun, 29 Jul 2012 05:31:45 -0400 Subject: [PATCH 01/40] Add queue manager app --- apps/queue_manager/__init__.py | 20 ++++ apps/queue_manager/admin.py | 22 +++++ apps/queue_manager/exceptions.py | 6 ++ apps/queue_manager/migrations/0001_initial.py | 57 +++++++++++ apps/queue_manager/migrations/__init__.py | 0 apps/queue_manager/models.py | 95 +++++++++++++++++++ apps/queue_manager/views.py | 1 + 7 files changed, 201 insertions(+) create mode 100755 apps/queue_manager/__init__.py create mode 100755 apps/queue_manager/admin.py create mode 100644 apps/queue_manager/exceptions.py create mode 100644 apps/queue_manager/migrations/0001_initial.py create mode 100644 apps/queue_manager/migrations/__init__.py create mode 100755 apps/queue_manager/models.py create mode 100755 apps/queue_manager/views.py diff --git a/apps/queue_manager/__init__.py b/apps/queue_manager/__init__.py new file mode 100755 index 0000000000..5b40c4e8ec --- /dev/null +++ b/apps/queue_manager/__init__.py @@ -0,0 +1,20 @@ +from queue_manager.models import Queue as QueueModel, QueuePushError + +class Queue(object): + @classmethod + def __new__(cls, name, queue_name, label=None, unique_names=False): + name = queue_name + if not label: + label=u'' + queue, created = QueueModel.objects.get_or_create( + name=name, + defaults={ + 'label': label, + 'unique_names': unique_names + } + ) + if not created: + queue.label = label + queue.unique_names = unique_names + queue.save() + return queue diff --git a/apps/queue_manager/admin.py b/apps/queue_manager/admin.py new file mode 100755 index 0000000000..14bb1dcb74 --- /dev/null +++ b/apps/queue_manager/admin.py @@ -0,0 +1,22 @@ +from django.contrib import admin + +from django.utils.translation import ugettext_lazy as _ + +from queue_manager.models import Queue, QueueItem + + +class QueueItemInline(admin.StackedInline): + model = QueueItem + + +class QueueAdmin(admin.ModelAdmin): + model = Queue + list_display = ('name', 'label', 'total_items') + inlines = [QueueItemInline] + + def total_items(self, obj): + return obj.items.all().count() + total_items.short_description = _(u'total items') + + +admin.site.register(Queue, QueueAdmin) diff --git a/apps/queue_manager/exceptions.py b/apps/queue_manager/exceptions.py new file mode 100644 index 0000000000..fc356e3b69 --- /dev/null +++ b/apps/queue_manager/exceptions.py @@ -0,0 +1,6 @@ +class QueueException(Exception): + pass + + +class QueuePushError(QueueException): + pass diff --git a/apps/queue_manager/migrations/0001_initial.py b/apps/queue_manager/migrations/0001_initial.py new file mode 100644 index 0000000000..22215e726d --- /dev/null +++ b/apps/queue_manager/migrations/0001_initial.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'Queue' + db.create_table('queue_manager_queue', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=32)), + ('unique_names', self.gf('django.db.models.fields.BooleanField')(default=False)), + )) + db.send_create_signal('queue_manager', ['Queue']) + + # Adding model 'QueueItem' + db.create_table('queue_manager_queueitem', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('queue', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['queue_manager.Queue'])), + ('creation_datetime', self.gf('django.db.models.fields.DateTimeField')()), + ('unique_name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=32, blank=True)), + ('name', self.gf('django.db.models.fields.CharField')(max_length=32, blank=True)), + ('data', self.gf('django.db.models.fields.TextField')()), + )) + db.send_create_signal('queue_manager', ['QueueItem']) + + + def backwards(self, orm): + # Deleting model 'Queue' + db.delete_table('queue_manager_queue') + + # Deleting model 'QueueItem' + db.delete_table('queue_manager_queueitem') + + + models = { + 'queue_manager.queue': { + 'Meta': {'object_name': 'Queue'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}), + 'unique_names': ('django.db.models.fields.BooleanField', [], {'default': 'False'}) + }, + 'queue_manager.queueitem': { + 'Meta': {'object_name': 'QueueItem'}, + 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {}), + 'data': ('django.db.models.fields.TextField', [], {}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '32', 'blank': 'True'}), + 'queue': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['queue_manager.Queue']"}), + 'unique_name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32', 'blank': 'True'}) + } + } + + complete_apps = ['queue_manager'] \ No newline at end of file diff --git a/apps/queue_manager/migrations/__init__.py b/apps/queue_manager/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/queue_manager/models.py b/apps/queue_manager/models.py new file mode 100755 index 0000000000..e82781723b --- /dev/null +++ b/apps/queue_manager/models.py @@ -0,0 +1,95 @@ +from __future__ import absolute_import + +from datetime import datetime + +from django.db import models +from django.utils.translation import ugettext_lazy as _ +from django.utils.simplejson import loads, dumps +from django.db import IntegrityError + +from .exceptions import QueuePushError + +queue_labels = {} + + +class QueueManager(models.Manager): + def get_or_create(self, *args, **kwargs): + queue_labels[kwargs.get('name')] = kwargs.get('defaults', {}).get('label') + return super(QueueManager, self).get_or_create(*args, **kwargs) + + +class Queue(models.Model): + # Internal name + name = models.CharField(max_length=32, verbose_name=_(u'name'), unique=True) + unique_names = models.BooleanField(verbose_name=_(u'unique names'), default=False) + + objects = QueueManager() + + def __unicode__(self): + return unicode(self.label) or self.name + + @property + def label(self): + return queue_labels.get(self.name) + + def push(self, data, name=None): # TODO: add replace flag + if not name: + name = u'' + queue_item = QueueItem(queue=self, name=name, data=dumps(data)) + queue_item.save() + return queue_item + + def pull(self): + queue_item_qs = QueueItem.objects.filter(queue=self).order_by('-creation_datetime') + if queue_item_qs: + queue_item = queue_item_qs[0] + queue_item.delete() + return loads(queue_item.data) + + @property + def items(self): + return self.queueitem_set + + def empty(self): + self.items.all().delete() + + def save(self, *args, **kwargs): + label = getattr(self, 'label', None) + if label: + queue_labels[self.name] = label + return super(Queue, self).save(*args, **kwargs) + + # TODO: custom runtime methods + + class Meta: + verbose_name = _(u'queue') + verbose_name_plural = _(u'queues') + + +class QueueItem(models.Model): + queue = models.ForeignKey(Queue, verbose_name=_(u'queue')) + creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime'), editable=False) + unique_name = models.CharField(blank=True, max_length=32, verbose_name=_(u'name'), unique=True, editable=False) + name = models.CharField(blank=True, max_length=32, verbose_name=_(u'name')) + data = models.TextField(verbose_name=_(u'data')) + + def __unicode__(self): + return self.name + + def save(self, *args, **kwargs): + self.creation_datetime = datetime.now() + + if self.queue.unique_names: + self.unique_name = self.name + else: + self.unique_name = unicode(self.creation_datetime) + try: + super(QueueItem, self).save(*args, **kwargs) + except IntegrityError: + # TODO: Maybe replace instead or rasining exception w/ replace flag + raise QueuePushError + + class Meta: + verbose_name = _(u'queue item') + verbose_name_plural = _(u'queue items') + diff --git a/apps/queue_manager/views.py b/apps/queue_manager/views.py new file mode 100755 index 0000000000..60f00ef0ef --- /dev/null +++ b/apps/queue_manager/views.py @@ -0,0 +1 @@ +# Create your views here. From d97b3f344d048c52180cca46d48f90ba729302e9 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Sun, 29 Jul 2012 05:32:17 -0400 Subject: [PATCH 02/40] Add shorthand get() class method for Singleton class --- apps/common/models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/common/models.py b/apps/common/models.py index d0b0f8414d..dc7be8316e 100644 --- a/apps/common/models.py +++ b/apps/common/models.py @@ -17,7 +17,11 @@ class Singleton(models.Model): lock_id = models.CharField(max_length=1, default=SINGLETON_LOCK_ID, editable=False, verbose_name=_(u'lock field'), unique=True) objects = SingletonManager() - + + @classmethod + def get(cls): + return cls.objects.get() + def save(self, *args, **kwargs): self.id = 1 super(Singleton, self).save(*args, **kwargs) From d2e6df4dde7a384be352602ed742a735bbb55ac4 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Sun, 29 Jul 2012 05:33:04 -0400 Subject: [PATCH 03/40] Initial changes for the new queue based OCR processing --- apps/ocr/__init__.py | 42 ++++----- apps/ocr/admin.py | 2 + apps/ocr/api.py | 2 +- apps/ocr/exceptions.py | 8 ++ apps/ocr/forms.py | 2 + apps/ocr/links.py | 24 +++-- apps/ocr/literals.py | 28 +++--- apps/ocr/managers.py | 24 ++--- apps/ocr/models.py | 111 ++++++++++++++++++---- apps/ocr/permissions.py | 2 +- apps/ocr/statistics.py | 2 +- apps/ocr/tasks.py | 8 +- apps/ocr/urls.py | 8 +- apps/ocr/views.py | 197 ++++++++++++++++++++-------------------- 14 files changed, 280 insertions(+), 180 deletions(-) diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index 95ba9a023f..f824a23e9d 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -17,47 +17,43 @@ from project_tools.api import register_tool from acls.api import class_permissions from scheduler.api import register_interval_job from statistics.api import register_statistics +from queue_manager.models import Queue from .conf.settings import (AUTOMATIC_OCR, QUEUE_PROCESSING_INTERVAL) -from .models import DocumentQueue, QueueTransformation +from .models import OCRProcessingSingleton from .tasks import task_process_document_queues from .permissions import PERMISSION_OCR_DOCUMENT from .exceptions import AlreadyQueued from . import models as ocr_models from .statistics import get_statistics +from .literals import OCR_QUEUE_NAME logger = logging.getLogger(__name__) from .links import (submit_document, re_queue_multiple_document, - queue_document_multiple_delete, document_queue_disable, - document_queue_enable, all_document_ocr_cleanup, queue_document_list, - ocr_tool_link, setup_queue_transformation_list, - setup_queue_transformation_create, setup_queue_transformation_edit, - setup_queue_transformation_delete, submit_document_multiple) + queue_document_multiple_delete, ocr_disable, + ocr_enable, all_document_ocr_cleanup, ocr_log, + ocr_tool_link, submit_document_multiple) bind_links([Document], [submit_document]) -bind_links([DocumentQueue], [document_queue_disable, document_queue_enable, setup_queue_transformation_list]) -bind_links([QueueTransformation], [setup_queue_transformation_edit, setup_queue_transformation_delete]) +bind_links([OCRProcessingSingleton], [ocr_disable, ocr_enable]) +#bind_links([QueueTransformation], [setup_queue_transformation_edit, setup_queue_transformation_delete]) -register_multi_item_links(['queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete]) +#register_multi_item_links(['queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete]) -bind_links(['setup_queue_transformation_create', 'setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'document_queue_disable', 'document_queue_enable', 'queue_document_list', 'setup_queue_transformation_list'], [queue_document_list], menu_name='secondary_menu') -bind_links(['setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'setup_queue_transformation_list', 'setup_queue_transformation_create'], [setup_queue_transformation_create], menu_name='sidebar') +#bind_links(['setup_queue_transformation_create', 'setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'document_queue_disable', 'document_queue_enable', 'queue_document_list', 'setup_queue_transformation_list'], [queue_document_list], menu_name='secondary_menu') +#bind_links(['setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'setup_queue_transformation_list', 'setup_queue_transformation_create'], [setup_queue_transformation_create], menu_name='sidebar') register_maintenance_links([all_document_ocr_cleanup], namespace='ocr', title=_(u'OCR')) -register_multi_item_links(['folder_view', 'search', 'results', 'index_instance_node_view', 'document_find_duplicates', 'document_type_document_list', 'document_group_view', 'document_list', 'document_list_recent'], [submit_document_multiple]) +#register_multi_item_links(['folder_view', 'search', 'results', 'index_instance_node_view', 'document_find_duplicates', 'document_type_document_list', 'document_group_view', 'document_list', 'document_list_recent'], [submit_document_multiple]) @transaction.commit_on_success -def create_default_queue(): +def create_ocr_queue(): try: - default_queue, created = DocumentQueue.objects.get_or_create(name='default') + queue, created = Queue.objects.get_or_create(name=OCR_QUEUE_NAME, defaults={'label': _('OCR'), 'unique_names': True}) except DatabaseError: transaction.rollback() - else: - if created: - default_queue.label = ugettext(u'Default') - default_queue.save() @receiver(post_save, dispatch_uid='document_post_save', sender=DocumentVersion) @@ -81,11 +77,10 @@ def document_post_save(sender, instance, **kwargs): # task_process_document_queues() -@receiver(post_syncdb, dispatch_uid='create_default_queue', sender=ocr_models) -def create_default_queue_signal_handler(sender, **kwargs): - create_default_queue() +#@receiver(post_syncdb, dispatch_uid='create_ocr_queue_on_syncdb', sender=ocr_models) +#def create_ocr_queue_on_syncdb(sender, **kwargs): -register_interval_job('task_process_document_queues', _(u'Checks the OCR queue for pending documents.'), task_process_document_queues, seconds=QUEUE_PROCESSING_INTERVAL) +#register_interval_job('task_process_document_queues', _(u'Checks the OCR queue for pending documents.'), task_process_document_queues, seconds=QUEUE_PROCESSING_INTERVAL) register_tool(ocr_tool_link) @@ -93,4 +88,5 @@ class_permissions(Document, [ PERMISSION_OCR_DOCUMENT, ]) -register_statistics(get_statistics) +#register_statistics(get_statistics) +create_ocr_queue() diff --git a/apps/ocr/admin.py b/apps/ocr/admin.py index 0210faf751..1689ad7a52 100644 --- a/apps/ocr/admin.py +++ b/apps/ocr/admin.py @@ -1,3 +1,4 @@ +""" from __future__ import absolute_import from django.contrib import admin @@ -18,3 +19,4 @@ class DocumentQueueAdmin(admin.ModelAdmin): admin.site.register(DocumentQueue, DocumentQueueAdmin) +""" diff --git a/apps/ocr/api.py b/apps/ocr/api.py index 33450b0862..2cc3fad6f4 100644 --- a/apps/ocr/api.py +++ b/apps/ocr/api.py @@ -87,7 +87,7 @@ def do_document_ocr(queue_document): parser, if the parser fails or if there is no parser registered for the document mimetype do a visual OCR by calling tesseract """ - for document_page in queue_document.document.pages.all(): + for document_page in queue_document.document_version.pages.all(): try: # Try to extract text by means of a parser parse_document_page(document_page) diff --git a/apps/ocr/exceptions.py b/apps/ocr/exceptions.py index 32ec4c4c07..27d72374b9 100644 --- a/apps/ocr/exceptions.py +++ b/apps/ocr/exceptions.py @@ -21,3 +21,11 @@ class UnpaperError(Exception): class ReQueueError(Exception): pass + + +class OCRProcessingAlreadyDisabled(Exception): + pass + + +class OCRProcessingAlreadyEnabled(Exception): + pass diff --git a/apps/ocr/forms.py b/apps/ocr/forms.py index 19e8ea6805..0fde716bbb 100644 --- a/apps/ocr/forms.py +++ b/apps/ocr/forms.py @@ -1,3 +1,4 @@ +""" from __future__ import absolute_import from django import forms @@ -19,3 +20,4 @@ class QueueTransformationForm_create(forms.ModelForm): class Meta: model = QueueTransformation exclude = ('content_type', 'object_id') +""" diff --git a/apps/ocr/links.py b/apps/ocr/links.py index 3baf133699..5c708ae4b2 100644 --- a/apps/ocr/links.py +++ b/apps/ocr/links.py @@ -7,7 +7,18 @@ from navigation.api import Link from .permissions import (PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE, PERMISSION_OCR_CLEAN_ALL_PAGES) +from .models import OCRProcessingSingleton +def is_enabled(context): + return OCRProcessingSingleton.get().is_enabled() + +def is_disabled(context): + return not OCRProcessingSingleton.get().is_enabled() + + +ocr_log = Link(text=_(u'queue document list'), view='ocr_log', sprite='text', permissions=[PERMISSION_OCR_DOCUMENT]) +ocr_disable = Link(text=_(u'disable OCR processing'), view='ocr_disable', sprite='control_stop_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE], conditional_disable=is_disabled) +ocr_enable = Link(text=_(u'enable OCR processing'), view='ocr_enable', sprite='control_play_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE], conditional_disable=is_enabled) submit_document = Link(text=_('submit to OCR queue'), view='submit_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) submit_document_multiple = Link(text=_('submit to OCR queue'), view='submit_document_multiple', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) re_queue_document = Link(text=_('re-queue'), view='re_queue_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) @@ -15,15 +26,12 @@ re_queue_multiple_document = Link(text=_('re-queue'), view='re_queue_multiple_do queue_document_delete = Link(text=_(u'delete'), view='queue_document_delete', args='object.id', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE]) queue_document_multiple_delete = Link(text=_(u'delete'), view='queue_document_multiple_delete', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE]) -document_queue_disable = Link(text=_(u'stop queue'), view='document_queue_disable', args='queue.id', sprite='control_stop_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE]) -document_queue_enable = Link(text=_(u'activate queue'), view='document_queue_enable', args='queue.id', sprite='control_play_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE]) all_document_ocr_cleanup = Link(text=_(u'clean up pages content'), view='all_document_ocr_cleanup', sprite='text_strikethrough', permissions=[PERMISSION_OCR_CLEAN_ALL_PAGES], description=_(u'Runs a language filter to remove common OCR mistakes from document pages content.')) -queue_document_list = Link(text=_(u'queue document list'), view='queue_document_list', sprite='hourglass', permissions=[PERMISSION_OCR_DOCUMENT]) -ocr_tool_link = Link(text=_(u'OCR'), view='queue_document_list', sprite='hourglass', icon='text.png', permissions=[PERMISSION_OCR_DOCUMENT], children_view_regex=[r'queue_', r'document_queue']) +ocr_tool_link = Link(text=_(u'OCR'), view='ocr_log', sprite='hourglass', icon='text.png', permissions=[PERMISSION_OCR_DOCUMENT]) # children_view_regex=[r'queue_', r'document_queue']) -setup_queue_transformation_list = Link(text=_(u'transformations'), view='setup_queue_transformation_list', args='queue.pk', sprite='shape_move_front') -setup_queue_transformation_create = Link(text=_(u'add transformation'), view='setup_queue_transformation_create', args='queue.pk', sprite='shape_square_add') -setup_queue_transformation_edit = Link(text=_(u'edit'), view='setup_queue_transformation_edit', args='transformation.pk', sprite='shape_square_edit') -setup_queue_transformation_delete = Link(text=_(u'delete'), view='setup_queue_transformation_delete', args='transformation.pk', sprite='shape_square_delete') +#setup_queue_transformation_list = Link(text=_(u'transformations'), view='setup_queue_transformation_list', args='queue.pk', sprite='shape_move_front') +#setup_queue_transformation_create = Link(text=_(u'add transformation'), view='setup_queue_transformation_create', args='queue.pk', sprite='shape_square_add') +#setup_queue_transformation_edit = Link(text=_(u'edit'), view='setup_queue_transformation_edit', args='transformation.pk', sprite='shape_square_edit') +#setup_queue_transformation_delete = Link(text=_(u'delete'), view='setup_queue_transformation_delete', args='transformation.pk', sprite='shape_square_delete') diff --git a/apps/ocr/literals.py b/apps/ocr/literals.py index 946c063e38..761cd017d5 100644 --- a/apps/ocr/literals.py +++ b/apps/ocr/literals.py @@ -1,25 +1,27 @@ from django.utils.translation import ugettext_lazy as _ -DOCUMENTQUEUE_STATE_STOPPED = 's' -DOCUMENTQUEUE_STATE_ACTIVE = 'a' +OCR_STATE_DISABLED = 'd' +OCR_STATE_ENABLED = 'e' -DOCUMENTQUEUE_STATE_CHOICES = ( - (DOCUMENTQUEUE_STATE_STOPPED, _(u'stopped')), - (DOCUMENTQUEUE_STATE_ACTIVE, _(u'active')), +OCR_STATE_CHOICES = ( + (OCR_STATE_DISABLED, _(u'disabled')), + (OCR_STATE_ENABLED, _(u'enabled')), ) -QUEUEDOCUMENT_STATE_PENDING = 'p' -QUEUEDOCUMENT_STATE_PROCESSING = 'i' -QUEUEDOCUMENT_STATE_ERROR = 'e' +#QUEUEDOCUMENT_STATE_PENDING = 'p' +#QUEUEDOCUMENT_STATE_PROCESSING = 'i' +#QUEUEDOCUMENT_STATE_ERROR = 'e' -QUEUEDOCUMENT_STATE_CHOICES = ( - (QUEUEDOCUMENT_STATE_PENDING, _(u'pending')), - (QUEUEDOCUMENT_STATE_PROCESSING, _(u'processing')), - (QUEUEDOCUMENT_STATE_ERROR, _(u'error')), -) +#QUEUEDOCUMENT_STATE_CHOICES = ( +# (QUEUEDOCUMENT_STATE_PENDING, _(u'pending')), +# (QUEUEDOCUMENT_STATE_PROCESSING, _(u'processing')), +# (QUEUEDOCUMENT_STATE_ERROR, _(u'error')), +#) DEFAULT_OCR_FILE_FORMAT = u'tiff' DEFAULT_OCR_FILE_EXTENSION = u'tif' UNPAPER_FILE_FORMAT = u'ppm' + +OCR_QUEUE_NAME = 'ocr' diff --git a/apps/ocr/managers.py b/apps/ocr/managers.py index b4596356d6..8e3946e0b5 100644 --- a/apps/ocr/managers.py +++ b/apps/ocr/managers.py @@ -2,19 +2,19 @@ from __future__ import absolute_import from django.db import models -from .exceptions import AlreadyQueued +#from .exceptions import AlreadyQueued -class DocumentQueueManager(models.Manager): - ''' - Module manager class to handle adding documents to an OCR document - queue - ''' - def queue_document(self, document, queue_name='default'): - document_queue = self.model.objects.get(name=queue_name) - if document_queue.queuedocument_set.filter(document=document): - raise AlreadyQueued +class OCRProcessingManager(models.Manager): + """ + Module manager class to handle adding documents to an OCR queue + """ + def queue_document(self, document): + pass + #document_queue = self.model.objects.get(name=queue_name) + #if document_queue.queuedocument_set.filter(document_version=document.latest_version): + # raise AlreadyQueued - document_queue.queuedocument_set.create(document=document, delay=True) + #document_queue.queuedocument_set.create(document_version=document.latest_version, delay=True) - return document_queue + #return document_queue diff --git a/apps/ocr/models.py b/apps/ocr/models.py index 8a77d12928..3cecf15951 100644 --- a/apps/ocr/models.py +++ b/apps/ocr/models.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from ast import literal_eval -from datetime import datetime +import datetime from django.db import models from django.utils.translation import ugettext_lazy as _ @@ -11,35 +11,109 @@ from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes import generic from django.core.exceptions import ValidationError -from documents.models import Document +from common.models import Singleton +from documents.models import Document, DocumentVersion from converter.api import get_available_transformations_choices from sources.managers import SourceTransformationManager -from .literals import (DOCUMENTQUEUE_STATE_CHOICES, - QUEUEDOCUMENT_STATE_PENDING, QUEUEDOCUMENT_STATE_CHOICES, - QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_ACTIVE) -from .managers import DocumentQueueManager -from .exceptions import ReQueueError +from .literals import (OCR_STATE_CHOICES, OCR_STATE_ENABLED, + OCR_STATE_DISABLED) +from .managers import OCRProcessingManager +from .exceptions import (ReQueueError, OCRProcessingAlreadyDisabled, + OCRProcessingAlreadyEnabled) -class DocumentQueue(models.Model): - name = models.CharField(max_length=64, unique=True, verbose_name=_(u'name')) - label = models.CharField(max_length=64, verbose_name=_(u'label')) +class OCRProcessingSingleton(Singleton): state = models.CharField(max_length=4, - choices=DOCUMENTQUEUE_STATE_CHOICES, - default=DOCUMENTQUEUE_STATE_ACTIVE, + choices=OCR_STATE_CHOICES, + default=OCR_STATE_ENABLED, verbose_name=_(u'state')) - objects = DocumentQueueManager() - - class Meta: - verbose_name = _(u'document queue') - verbose_name_plural = _(u'document queues') + #objects = AnonymousUserSingletonManager() def __unicode__(self): - return self.label + return ugettext('OCR processing') + + def disable(self): + if self.state == OCR_STATE_DISABLED: + raise OCRProcessingAlreadyDisabled + + self.state = OCR_STATE_DISABLED + self.save() + + def enable(self): + if self.state == OCR_STATE_ENABLED: + raise OCRProcessingAlreadyEnabled + + self.state = OCR_STATE_ENABLED + self.save() + + def is_enabled(self): + return self.state == OCR_STATE_ENABLED + + class Meta: + verbose_name = verbose_name_plural = _(u'OCR processing properties') + +""" +class OCRLog(models.Model): + #queue = models.ForeignKey(Queue, verbose_name=_(u'queue')) + document_version = models.ForeignKey(DocumentVersion, verbose_name=_(u'document version')) + datetime = models.DateTimeField(verbose_name=_(u'date time'), default=lambda: datetime.datetime.now(), db_index=True) + delay = models.BooleanField(verbose_name=_(u'delay OCR'), default=False) + #state = models.CharField(max_length=4, + # choices=QUEUEDOCUMENT_STATE_CHOICES, + # default=QUEUEDOCUMENT_STATE_PENDING, + # verbose_name=_(u'state')) + result = models.TextField(blank=True, null=True, verbose_name=_(u'result')) + #node_name = models.CharField(max_length=32, verbose_name=_(u'node name'), blank=True, null=True) + + class Meta: + ordering = ('datetime',) + verbose_name = _(u'OCR log entry') + verbose_name_plural = _(u'OCR log entries') + + #def get_transformation_list(self): + # return QueueTransformation.transformations.get_for_object_as_list(self) + + def requeue(self): + pass + #if self.state == QUEUEDOCUMENT_STATE_PROCESSING: + # raise ReQueueError + #else: + # self.datetime_submitted = datetime.now() + # self.state = QUEUEDOCUMENT_STATE_PENDING + # self.delay = False + # self.result = None + # self.node_name = None + # self.save() + + def __unicode__(self): + try: + return unicode(self.document) + except ObjectDoesNotExist: + return ugettext(u'Missing document.') +""" + +#class DocumentQueue(models.Model): +# name = models.CharField(max_length=64, unique=True, verbose_name=_(u'name')) +# label = models.CharField(max_length=64, verbose_name=_(u'label')) +# state = models.CharField(max_length=4, +# choices=DOCUMENTQUEUE_STATE_CHOICES, +# default=DOCUMENTQUEUE_STATE_ACTIVE, +# verbose_name=_(u'state')) +# +# objects = DocumentQueueManager()# +# +# class Meta: +# verbose_name = _(u'document queue') +# verbose_name_plural = _(u'document queues')# +# +# def __unicode__(self): +# return self.label + +""" class QueueDocument(models.Model): document_queue = models.ForeignKey(DocumentQueue, verbose_name=_(u'document queue')) document = models.ForeignKey(Document, verbose_name=_(u'document')) @@ -121,3 +195,4 @@ class QueueTransformation(models.Model): ordering = ('order',) verbose_name = _(u'document queue transformation') verbose_name_plural = _(u'document queue transformations') +""" diff --git a/apps/ocr/permissions.py b/apps/ocr/permissions.py index f74f1ec267..17f7a5de7a 100644 --- a/apps/ocr/permissions.py +++ b/apps/ocr/permissions.py @@ -7,6 +7,6 @@ from permissions.models import Permission, PermissionNamespace ocr_namespace = PermissionNamespace('ocr', _(u'OCR')) PERMISSION_OCR_DOCUMENT = Permission.objects.register(ocr_namespace, 'ocr_document', _(u'Submit documents for OCR')) PERMISSION_OCR_DOCUMENT_DELETE = Permission.objects.register(ocr_namespace, 'ocr_document_delete', _(u'Delete documents from OCR queue')) -PERMISSION_OCR_QUEUE_ENABLE_DISABLE = Permission.objects.register(ocr_namespace, 'ocr_queue_enable_disable', _(u'Can enable/disable the OCR queue')) +PERMISSION_OCR_QUEUE_ENABLE_DISABLE = Permission.objects.register(ocr_namespace, 'ocr_queue_enable_disable', _(u'Can enable/disable the OCR processing')) PERMISSION_OCR_CLEAN_ALL_PAGES = Permission.objects.register(ocr_namespace, 'ocr_clean_all_pages', _(u'Can execute the OCR clean up on all document pages')) PERMISSION_OCR_QUEUE_EDIT = Permission.objects.register(ocr_namespace, 'ocr_queue_edit', _(u'Can edit an OCR queue properties')) diff --git a/apps/ocr/statistics.py b/apps/ocr/statistics.py index 590075c719..bef113303e 100644 --- a/apps/ocr/statistics.py +++ b/apps/ocr/statistics.py @@ -2,7 +2,7 @@ from __future__ import absolute_import from django.utils.translation import ugettext as _ -from .models import DocumentQueue, QueueDocument +#from .models import DocumentQueue, QueueDocument def get_statistics(): diff --git a/apps/ocr/tasks.py b/apps/ocr/tasks.py index 0a0d8ab1e6..9780c8b2a8 100644 --- a/apps/ocr/tasks.py +++ b/apps/ocr/tasks.py @@ -10,10 +10,10 @@ from job_processor.api import process_job from lock_manager import Lock, LockError from .api import do_document_ocr -from .literals import (QUEUEDOCUMENT_STATE_PENDING, - QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_ACTIVE, - QUEUEDOCUMENT_STATE_ERROR) -from .models import QueueDocument, DocumentQueue +#from .literals import (QUEUEDOCUMENT_STATE_PENDING, +# QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_ACTIVE, +# QUEUEDOCUMENT_STATE_ERROR) +#from .models import QueueDocument, DocumentQueue from .conf.settings import NODE_CONCURRENT_EXECUTION, REPLICATION_DELAY LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes diff --git a/apps/ocr/urls.py b/apps/ocr/urls.py index d77be818f8..b652f800c7 100644 --- a/apps/ocr/urls.py +++ b/apps/ocr/urls.py @@ -1,16 +1,18 @@ from django.conf.urls.defaults import patterns, url urlpatterns = patterns('ocr.views', + url(r'^log/$', 'ocr_log', (), 'ocr_log'), + + url(r'^processing/enable/$', 'ocr_enable', (), 'ocr_enable'), + url(r'^processing/disable/$', 'ocr_disable', (), 'ocr_disable'), + url(r'^document/(?P\d+)/submit/$', 'submit_document', (), 'submit_document'), url(r'^document/multiple/submit/$', 'submit_document_multiple', (), 'submit_document_multiple'), - url(r'^queue/document/list/$', 'queue_document_list', (), 'queue_document_list'), url(r'^queue/document/(?P\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'), url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'), url(r'^queue/document/(?P\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'), url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'), - url(r'^queue/(?P\d+)/enable/$', 'document_queue_enable', (), 'document_queue_enable'), - url(r'^queue/(?P\d+)/disable/$', 'document_queue_disable', (), 'document_queue_disable'), url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'), diff --git a/apps/ocr/views.py b/apps/ocr/views.py index 3af18eb59c..d4dcb109d6 100644 --- a/apps/ocr/views.py +++ b/apps/ocr/views.py @@ -18,52 +18,111 @@ from acls.models import AccessEntry from .permissions import (PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE, PERMISSION_OCR_CLEAN_ALL_PAGES, PERMISSION_OCR_QUEUE_EDIT) -from .models import DocumentQueue, QueueDocument, QueueTransformation -from .literals import (QUEUEDOCUMENT_STATE_PROCESSING, - DOCUMENTQUEUE_STATE_ACTIVE, DOCUMENTQUEUE_STATE_STOPPED) -from .exceptions import AlreadyQueued, ReQueueError +from .models import OCRProcessingSingleton +#from .literals import (QUEUEDOCUMENT_STATE_PROCESSING, +# DOCUMENTQUEUE_STATE_ACTIVE, DOCUMENTQUEUE_STATE_STOPPED) +from .exceptions import (AlreadyQueued, ReQueueError, OCRProcessingAlreadyDisabled, + OCRProcessingAlreadyEnabled) from .api import clean_pages -from .forms import QueueTransformationForm, QueueTransformationForm_create +#from .forms import QueueTransformationForm, QueueTransformationForm_create -def queue_document_list(request, queue_name='default'): +def ocr_log(request): Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT]) - document_queue = get_object_or_404(DocumentQueue, name=queue_name) - - return object_list( - request, - queryset=document_queue.queuedocument_set.all(), - template_name='generic_list.html', - extra_context={ - 'title': _(u'documents in queue: %s') % document_queue, - 'hide_object': True, - 'queue': document_queue, - 'object_name': _(u'document queue'), - 'navigation_object_name': 'queue', - 'list_object_variable_name': 'queue_document', - 'extra_columns': [ - {'name': 'document', 'attribute': encapsulate(lambda x: document_link(x.document) if hasattr(x, 'document') else _(u'Missing document.'))}, - {'name': _(u'thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document))}, - {'name': 'submitted', 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True}, - {'name': 'delay', 'attribute': 'delay'}, - {'name': 'state', 'attribute': encapsulate(lambda x: x.get_state_display())}, - {'name': 'node', 'attribute': 'node_name'}, - {'name': 'result', 'attribute': 'result'}, - ], - 'multi_select_as_buttons': True, - 'sidebar_subtemplates_list': [ - { - 'name': 'generic_subtemplate.html', - 'context': { - 'side_bar': True, - 'title': _(u'document queue properties'), - 'content': _(u'Current state: %s') % document_queue.get_state_display(), - } + context = { + 'queue': OCRProcessingSingleton.get(), + 'object_name': _(u'OCR processing'), # TODO fix, not working + 'navigation_object_name': 'queue', + 'object_list': [], + 'title': _(u'OCR log items'), + #'hide_object': True, + #'hide_link': True, + 'extra_columns': [ + {'name': _(u'document'), 'attribute': encapsulate(lambda x: document_link(x.document_version.document) if hasattr(x, 'document_version') else _(u'Missing document.'))}, + {'name': _(u'version'), 'attribute': 'document_version'}, + {'name': _(u'thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document_version.document))}, + {'name': _('submitted'), 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True}, + #{'name': _('delay'), 'attribute': 'delay'}, + #{'name': _('state'), 'attribute': encapsulate(lambda x: x.get_state_display())}, + #{'name': _('node'), 'attribute': 'node_name'}, + {'name': _('result'), 'attribute': 'result'}, + ], + 'multi_select_as_buttons': True, + 'sidebar_subtemplates_list': [ + { + 'name': 'generic_subtemplate.html', + 'context': { + 'side_bar': True, + 'title': _(u'OCR processing properties'), + 'content': _(u'Current state: %s') % OCRProcessingSingleton.get().get_state_display(), } - ] - }, - ) + } + ] + } + + return render_to_response('generic_list.html', context, + context_instance=RequestContext(request)) + + # 'queue': document_queue, + # 'object_name': _(u'document queue'), + # 'navigation_object_name': 'queue', + # 'list_object_variable_name': 'queue_document', + # }, + #) + + +def ocr_disable(request): + Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]) + + next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) + previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) + + if request.method == 'POST': + try: + OCRProcessingSingleton.get().disable() + except OCRProcessingAlreadyDisabled: + messages.warning(request, _(u'OCR processing already disabled.')) + return HttpResponseRedirect(previous) + else: + messages.success(request, _(u'OCR processing disabled successfully.')) + return HttpResponseRedirect(next) + + return render_to_response('generic_confirm.html', { + 'queue': OCRProcessingSingleton.get(), + 'navigation_object_name': 'queue', + 'title': _(u'Are you sure you wish to disable OCR processing?'), + 'next': next, + 'previous': previous, + 'form_icon': u'control_stop_blue.png', + }, context_instance=RequestContext(request)) + + +def ocr_enable(request): + Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]) + + next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) + previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) + + if request.method == 'POST': + try: + OCRProcessingSingleton.get().enable() + except OCRProcessingAlreadyDisabled: + messages.warning(request, _(u'OCR processing already enabled.')) + return HttpResponseRedirect(previous) + else: + messages.success(request, _(u'OCR processing enabled successfully.')) + return HttpResponseRedirect(next) + + return render_to_response('generic_confirm.html', { + 'queue': OCRProcessingSingleton.get(), + 'navigation_object_name': 'queue', + 'title': _(u'Are you sure you wish to enable OCR processing?'), + 'next': next, + 'previous': previous, + 'form_icon': u'control_play_blue.png', + }, context_instance=RequestContext(request)) + def queue_document_delete(request, queue_document_id=None, queue_document_id_list=None): @@ -175,12 +234,12 @@ def re_queue_document(request, queue_document_id=None, queue_document_id_list=No messages.success( request, _(u'Document: %(document)s was re-queued to the OCR queue: %(queue)s') % { - 'document': queue_document.document, + 'document': queue_document.document_version.document, 'queue': queue_document.document_queue.label } ) except Document.DoesNotExist: - messages.error(request, _(u'Document id#: %d, no longer exists.') % queue_document.document_id) + messages.error(request, _(u'Document no longer in queue.')) except ReQueueError: messages.warning( request, @@ -208,60 +267,6 @@ def re_queue_multiple_document(request): return re_queue_document(request, queue_document_id_list=request.GET.get('id_list', [])) -def document_queue_disable(request, document_queue_id): - Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]) - - next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) - previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) - document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id) - - if document_queue.state == DOCUMENTQUEUE_STATE_STOPPED: - messages.warning(request, _(u'Document queue: %s, already stopped.') % document_queue) - return HttpResponseRedirect(previous) - - if request.method == 'POST': - document_queue.state = DOCUMENTQUEUE_STATE_STOPPED - document_queue.save() - messages.success(request, _(u'Document queue: %s, stopped successfully.') % document_queue) - return HttpResponseRedirect(next) - - return render_to_response('generic_confirm.html', { - 'queue': document_queue, - 'navigation_object_name': 'queue', - 'title': _(u'Are you sure you wish to disable document queue: %s') % document_queue, - 'next': next, - 'previous': previous, - 'form_icon': u'control_stop_blue.png', - }, context_instance=RequestContext(request)) - - -def document_queue_enable(request, document_queue_id): - Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_ENABLE_DISABLE]) - - next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None))) - previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None))) - document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id) - - if document_queue.state == DOCUMENTQUEUE_STATE_ACTIVE: - messages.warning(request, _(u'Document queue: %s, already active.') % document_queue) - return HttpResponseRedirect(previous) - - if request.method == 'POST': - document_queue.state = DOCUMENTQUEUE_STATE_ACTIVE - document_queue.save() - messages.success(request, _(u'Document queue: %s, activated successfully.') % document_queue) - return HttpResponseRedirect(next) - - return render_to_response('generic_confirm.html', { - 'queue': document_queue, - 'navigation_object_name': 'queue', - 'title': _(u'Are you sure you wish to activate document queue: %s') % document_queue, - 'next': next, - 'previous': previous, - 'form_icon': u'control_play_blue.png', - }, context_instance=RequestContext(request)) - - def all_document_ocr_cleanup(request): Permission.objects.check_permissions(request.user, [PERMISSION_OCR_CLEAN_ALL_PAGES]) From b15f5538e12dbfbfda22d1b34b911c39db006dc7 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Sun, 29 Jul 2012 05:33:37 -0400 Subject: [PATCH 04/40] Add OCR migration and doc update --- ...euedocument__del_queuetransformation__a.py | 77 +++++++++++++++++++ docs/releases/0.13.rst | 1 + 2 files changed, 78 insertions(+) create mode 100644 apps/ocr/migrations/0002_auto__del_documentqueue__del_queuedocument__del_queuetransformation__a.py diff --git a/apps/ocr/migrations/0002_auto__del_documentqueue__del_queuedocument__del_queuetransformation__a.py b/apps/ocr/migrations/0002_auto__del_documentqueue__del_queuedocument__del_queuetransformation__a.py new file mode 100644 index 0000000000..8317488788 --- /dev/null +++ b/apps/ocr/migrations/0002_auto__del_documentqueue__del_queuedocument__del_queuetransformation__a.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Deleting model 'DocumentQueue' + db.delete_table('ocr_documentqueue') + + # Deleting model 'QueueDocument' + db.delete_table('ocr_queuedocument') + + # Deleting model 'QueueTransformation' + db.delete_table('ocr_queuetransformation') + + # Adding model 'OCRProcessingSingleton' + db.create_table('ocr_ocrprocessingsingleton', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('lock_id', self.gf('django.db.models.fields.CharField')(default=1, unique=True, max_length=1)), + ('state', self.gf('django.db.models.fields.CharField')(default='a', max_length=4)), + )) + db.send_create_signal('ocr', ['OCRProcessingSingleton']) + + + def backwards(self, orm): + # Adding model 'DocumentQueue' + db.create_table('ocr_documentqueue', ( + ('state', self.gf('django.db.models.fields.CharField')(default='a', max_length=4)), + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('name', self.gf('django.db.models.fields.CharField')(max_length=64, unique=True)), + ('label', self.gf('django.db.models.fields.CharField')(max_length=64)), + )) + db.send_create_signal('ocr', ['DocumentQueue']) + + # Adding model 'QueueDocument' + db.create_table('ocr_queuedocument', ( + ('delay', self.gf('django.db.models.fields.BooleanField')(default=False)), + ('state', self.gf('django.db.models.fields.CharField')(default='p', max_length=4)), + ('result', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), + ('datetime_submitted', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True, db_index=True)), + ('document_queue', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['ocr.DocumentQueue'])), + ('document_version', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['documents.DocumentVersion'])), + ('document', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['documents.Document'])), + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('node_name', self.gf('django.db.models.fields.CharField')(max_length=32, null=True, blank=True)), + )) + db.send_create_signal('ocr', ['QueueDocument']) + + # Adding model 'QueueTransformation' + db.create_table('ocr_queuetransformation', ( + ('object_id', self.gf('django.db.models.fields.PositiveIntegerField')()), + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('arguments', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), + ('content_type', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['contenttypes.ContentType'])), + ('order', self.gf('django.db.models.fields.PositiveIntegerField')(default=0, null=True, blank=True, db_index=True)), + ('transformation', self.gf('django.db.models.fields.CharField')(max_length=128)), + )) + db.send_create_signal('ocr', ['QueueTransformation']) + + # Deleting model 'OCRProcessingSingleton' + db.delete_table('ocr_ocrprocessingsingleton') + + + models = { + 'ocr.ocrprocessingsingleton': { + 'Meta': {'object_name': 'OCRProcessingSingleton'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'lock_id': ('django.db.models.fields.CharField', [], {'default': '1', 'unique': 'True', 'max_length': '1'}), + 'state': ('django.db.models.fields.CharField', [], {'default': "'a'", 'max_length': '4'}) + } + } + + complete_apps = ['ocr'] \ No newline at end of file diff --git a/docs/releases/0.13.rst b/docs/releases/0.13.rst index 027d42f17b..68afd7b86f 100644 --- a/docs/releases/0.13.rst +++ b/docs/releases/0.13.rst @@ -73,6 +73,7 @@ Afterwards migrate existing database schema with:: $ ./manage.py migrate metadata 0001 --fake $ ./manage.py migrate acls 0001 --fake $ ./manage.py migrate ocr 0001 --fake + $ ./manage.py migrate ocr $ ./manage.py migrate history 0001 --fake $ ./manage.py migrate tags 0001 --fake $ ./manage.py migrate linking 0001 --fake From 20856b1589825756cd9a0236a20cc902dec0ca1c Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Sun, 29 Jul 2012 05:34:13 -0400 Subject: [PATCH 05/40] Add queue manager app to installed apps --- settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/settings.py b/settings.py index fa4bfb8ea5..e9d25d00c7 100644 --- a/settings.py +++ b/settings.py @@ -154,6 +154,7 @@ INSTALLED_APPS = ( 'navigation', 'lock_manager', 'web_theme', + 'queue_manager', # pagination needs to go after web_theme so that the pagination template is found 'pagination', 'common', From 486f983d4b2d72ad561d69c7c0f97ec411a4ea0f Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 07:36:02 -0400 Subject: [PATCH 06/40] Refactor job processing app to do actual job queue and job subprocess launching, remove queue mananger app, update ocr app to use new job processing app --- apps/bootstrap/api.py | 14 +- apps/job_processor/__init__.py | 13 + apps/job_processor/admin.py | 34 +++ apps/job_processor/api.py | 2 - apps/job_processor/exceptions.py | 14 + apps/job_processor/literals.py | 19 ++ apps/job_processor/models.py | 240 +++++++++++++++++- apps/job_processor/tasks.py | 58 +++++ apps/lock_manager/models.py | 7 +- apps/ocr/__init__.py | 26 +- apps/ocr/admin.py | 22 -- apps/ocr/api.py | 10 +- apps/ocr/forms.py | 23 -- apps/ocr/links.py | 5 - apps/ocr/literals.py | 11 - apps/ocr/models.py | 143 ----------- apps/ocr/tasks.py | 75 ------ apps/ocr/views.py | 17 +- apps/queue_manager/__init__.py | 20 -- apps/queue_manager/admin.py | 22 -- apps/queue_manager/exceptions.py | 6 - apps/queue_manager/migrations/0001_initial.py | 57 ----- apps/queue_manager/migrations/__init__.py | 0 apps/queue_manager/models.py | 95 ------- apps/queue_manager/views.py | 1 - apps/scheduler/__init__.py | 3 + settings.py | 3 +- 27 files changed, 409 insertions(+), 531 deletions(-) create mode 100644 apps/job_processor/admin.py delete mode 100644 apps/job_processor/api.py create mode 100644 apps/job_processor/exceptions.py create mode 100644 apps/job_processor/literals.py create mode 100644 apps/job_processor/tasks.py delete mode 100644 apps/ocr/admin.py delete mode 100644 apps/ocr/forms.py delete mode 100644 apps/ocr/tasks.py delete mode 100755 apps/queue_manager/__init__.py delete mode 100755 apps/queue_manager/admin.py delete mode 100644 apps/queue_manager/exceptions.py delete mode 100644 apps/queue_manager/migrations/0001_initial.py delete mode 100644 apps/queue_manager/migrations/__init__.py delete mode 100755 apps/queue_manager/models.py delete mode 100755 apps/queue_manager/views.py diff --git a/apps/bootstrap/api.py b/apps/bootstrap/api.py index 9f9f37fd7c..5d04156f71 100644 --- a/apps/bootstrap/api.py +++ b/apps/bootstrap/api.py @@ -9,13 +9,13 @@ from documents.models import DocumentType, DocumentTypeFilename, Document from metadata.models import MetadataType, MetadataSet from document_indexing.models import Index, IndexTemplateNode from sources.models import WebForm, StagingFolder -from ocr.models import QueueDocument, QueueTransformation, DocumentQueue from history.models import History from taggit.models import Tag from tags.models import TagProperties from folders.models import Folder from dynamic_search.models import RecentSearch from django_gpg.runtime import gpg +# TODO: clear the job queues bootstrap_options = {} @@ -63,18 +63,6 @@ def nuke_database(): for obj in Role.objects.all(): obj.delete() - # Delete all document in the ocr queue - for obj in QueueDocument.objects.all(): - obj.delete() - - # Delete all the transformations for a queue - for obj in QueueTransformation.objects.all(): - obj.delete() - - # Delete all the ocr document queues - for obj in DocumentQueue.objects.all(): - obj.delete() - # Delete all the remaining history events for obj in History.objects.all(): obj.delete() diff --git a/apps/job_processor/__init__.py b/apps/job_processor/__init__.py index e69de29bb2..0d06afc514 100644 --- a/apps/job_processor/__init__.py +++ b/apps/job_processor/__init__.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import + +from django.utils.translation import ugettext_lazy as _ + +from scheduler.api import register_interval_job + +from .tasks import refresh_node, job_queue_poll + +NODE_REFRESH_INTERVAL = 1 +JOB_QUEUE_POLL_INTERVAL = 1 + +register_interval_job('refresh_node', _(u'Update a node\'s properties.'), refresh_node, seconds=NODE_REFRESH_INTERVAL) +register_interval_job('job_queue_poll', _(u'Poll a job queue for pending jobs.'), job_queue_poll, seconds=JOB_QUEUE_POLL_INTERVAL) diff --git a/apps/job_processor/admin.py b/apps/job_processor/admin.py new file mode 100644 index 0000000000..29d0535763 --- /dev/null +++ b/apps/job_processor/admin.py @@ -0,0 +1,34 @@ +from __future__ import absolute_import + +from django.contrib import admin +from django.utils.translation import ugettext_lazy as _ + +from .models import Node, JobQueue, JobQueueItem, Worker + + +class WorkerInline(admin.StackedInline): + list_display = ('name', 'creation_datetime', 'state') + model = Worker + + +class NodeAdmin(admin.ModelAdmin): + list_display = ('hostname', 'cpuload', 'heartbeat', 'memory_usage') + inlines = [WorkerInline] + + +class JobQueueItemInline(admin.StackedInline): + model = JobQueueItem + + +class JobQueueAdmin(admin.ModelAdmin): + model = JobQueue + list_display = ('name', 'label', 'total_items') + inlines = [JobQueueItemInline] + + def total_items(self, obj): + return obj.items.all().count() + total_items.short_description = _(u'total items') + + +admin.site.register(Node, NodeAdmin) +admin.site.register(JobQueue, JobQueueAdmin) diff --git a/apps/job_processor/api.py b/apps/job_processor/api.py deleted file mode 100644 index 00b9736fef..0000000000 --- a/apps/job_processor/api.py +++ /dev/null @@ -1,2 +0,0 @@ -def process_job(func, *args, **kwargs): - return func(*args, **kwargs) diff --git a/apps/job_processor/exceptions.py b/apps/job_processor/exceptions.py new file mode 100644 index 0000000000..bac36e7b37 --- /dev/null +++ b/apps/job_processor/exceptions.py @@ -0,0 +1,14 @@ +#class WorkerAlreadyDisabled(Exception): +# pass + + +#class WorkerAlreadyEnabled(Exception): +# pass + + +class JobQueuePushError(Exception): + pass + + +class JobQueueNoPendingJobs(Exception): + pass diff --git a/apps/job_processor/literals.py b/apps/job_processor/literals.py new file mode 100644 index 0000000000..a8a1bdf7aa --- /dev/null +++ b/apps/job_processor/literals.py @@ -0,0 +1,19 @@ +from django.utils.translation import ugettext_lazy as _ + +WORKER_STATE_RUNNING = 'r' +WORKER_STATE_DEAD = 'd' + +WORKER_STATE_CHOICES = ( + (WORKER_STATE_RUNNING, _(u'running')), + (WORKER_STATE_DEAD, _(u'dead')), +) + +JOB_STATE_PENDING = 'p' +JOB_STATE_PROCESSING = 'r' +JOB_STATE_ERROR = 'e' + +JOB_STATE_CHOICES = ( + (JOB_STATE_PENDING, _(u'pending')), + (JOB_STATE_PROCESSING, _(u'processing')), + (JOB_STATE_ERROR, _(u'error')), +) diff --git a/apps/job_processor/models.py b/apps/job_processor/models.py index 71a8362390..90859397a6 100644 --- a/apps/job_processor/models.py +++ b/apps/job_processor/models.py @@ -1,3 +1,239 @@ -from django.db import models +from __future__ import absolute_import -# Create your models here. +import os +import datetime +import uuid +import hashlib +import platform +from multiprocessing import Process + +from django.db import models, IntegrityError, transaction +from django.db import close_connection +from django.contrib.contenttypes import generic +from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import ugettext +from django.utils.simplejson import loads, dumps + +from common.models import Singleton +from .literals import (JOB_STATE_CHOICES, JOB_STATE_PENDING, + JOB_STATE_PROCESSING, JOB_STATE_ERROR, WORKER_STATE_CHOICES, + WORKER_STATE_RUNNING) +from .exceptions import JobQueuePushError, JobQueueNoPendingJobs +#from .exceptions import (WorkerAlreadyDisabled, WorkerAlreadyEnabled) + +job_queue_labels = {} +job_types_registry = {} + + +class Job(object): + def __init__(self, function, job_queue_item): + close_connection() + # Run sync or launch async subprocess + # OR launch 2 processes: monitor & actual process + node = Node.objects.get_myself() + worker = Worker.objects.create(node=node, name=u'%s-%d' % (node.hostname, os.getpid())) + try: + close_connection() + transaction.commit_on_success(function)(**loads(job_queue_item.kwargs)) + #function(**loads(job_queue_item.kwargs)) + except Exception, exc: + close_connection() + transaction.rollback() + close_connection() + def set_state_error(): + job_queue_item.result = exc + job_queue_item.state = JOB_STATE_ERROR + job_queue_item.save() + transaction.commit_on_success(set_state_error)() + else: + job_queue_item.delete() + finally: + worker.delete() + + +class JobType(object): + def __init__(self, name, label, function): + self.name = name + self.label = label + self.function = function + job_types_registry[self.name] = self + + def run(self, job_queue_item, **kwargs): + job_queue_item.state = JOB_STATE_PROCESSING + job_queue_item.save() + p = Process(target=Job, args=(self.function, job_queue_item,)) + p.start() + #p.join() + + +class NodeManager(models.Manager): + def get_myself(self): + return self.model.objects.get(hostname=platform.node()) + + +class Node(models.Model): + hostname = models.CharField(max_length=255, verbose_name=_(u'hostname')) + cpuload = models.PositiveIntegerField(blank=True, default=0, verbose_name=_(u'cpu load')) + heartbeat = models.DateTimeField(blank=True, default=datetime.datetime.now(), verbose_name=_(u'last heartbeat check')) + memory_usage = models.FloatField(blank=True, verbose_name=_(u'memory usage')) + + objects = NodeManager() + + def __unicode__(self): + return self.hostname + + def save(self, *args, **kwargs): + self.heartbeat = datetime.datetime.now() + return super(Node, self).save(*args, **kwargs) + + class Meta: + verbose_name = _(u'node') + verbose_name_plural = _(u'nodes') + + +class JobQueueManager(models.Manager): + def get_or_create(self, *args, **kwargs): + job_queue_labels[kwargs.get('name')] = kwargs.get('defaults', {}).get('label') + return super(JobQueueManager, self).get_or_create(*args, **kwargs) + + +class JobQueue(models.Model): + # TODO: support for stopping and starting job queues + # Internal name + name = models.CharField(max_length=32, verbose_name=_(u'name'), unique=True) + unique_jobs = models.BooleanField(verbose_name=_(u'unique jobs'), default=True) + + objects = JobQueueManager() + + def __unicode__(self): + return unicode(self.label) or self.names + + @property + def label(self): + return job_queue_labels.get(self.name) + + def push(self, job_type, **kwargs): # TODO: add replace flag + job_queue_item = JobQueueItem(job_queue=self, job_type=job_type.name, kwargs=dumps(kwargs)) + job_queue_item.save() + return job_queue_item + + #def pull(self): + # queue_item_qs = JobQueueItem.objects.filter(queue=self).order_by('-creation_datetime') + # if queue_item_qs: + # queue_item = queue_item_qs[0] + # queue_item.delete() + # return loads(queue_item.data) + + def get_oldest_pending_job(self): + try: + return self.pending_jobs.all().order_by('-creation_datetime')[0] + except IndexError: + raise JobQueueNoPendingJobs + + @property + def pending_jobs(self): + return self.items.filter(state=JOB_STATE_PENDING) + + @property + def items(self): + return self.jobqueueitem_set + + def empty(self): + self.items.all().delete() + + def save(self, *args, **kwargs): + label = getattr(self, 'label', None) + if label: + job_queue_labels[self.name] = label + return super(JobQueue, self).save(*args, **kwargs) + + # TODO: custom runtime methods + + class Meta: + verbose_name = _(u'job queue') + verbose_name_plural = _(u'job queues') + + +class JobQueueItem(models.Model): + # TODO: add re-queue + job_queue = models.ForeignKey(JobQueue, verbose_name=_(u'job queue')) + creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime'), editable=False) + unique_id = models.CharField(blank=True, max_length=64, verbose_name=_(u'id'), unique=True, editable=False) + job_type = models.CharField(max_length=32, verbose_name=_(u'job type')) + kwargs = models.TextField(verbose_name=_(u'keyword arguments')) + state = models.CharField(max_length=4, + choices=JOB_STATE_CHOICES, + default=JOB_STATE_PENDING, + verbose_name=_(u'state')) + result = models.TextField(blank=True, verbose_name=_(u'result')) + + def __unicode__(self): + return self.unique_id + + def save(self, *args, **kwargs): + self.creation_datetime = datetime.datetime.now() + + if self.job_queue.unique_jobs: + self.unique_id = hashlib.sha256(u'%s-%s' % (self.job_type, self.kwargs)).hexdigest() + else: + self.unique_id = unicode(uuid.uuid4()) + try: + super(JobQueueItem, self).save(*args, **kwargs) + except IntegrityError: + # TODO: Maybe replace instead of rasining exception w/ replace flag + raise JobQueuePushError + + def run(self): + job_type_instance = job_types_registry.get(self.job_type) + job_type_instance.run(self) + + class Meta: + ordering = ('creation_datetime',) + verbose_name = _(u'job queue item') + verbose_name_plural = _(u'job queue items') + + +class Worker(models.Model): + node = models.ForeignKey(Node, verbose_name=_(u'node')) + name = models.CharField(max_length=255, verbose_name=_(u'name')) + creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime'), default=lambda: datetime.datetime.now(), editable=False) + heartbeat = models.DateTimeField(blank=True, default=datetime.datetime.now(), verbose_name=_(u'heartbeat check')) + state = models.CharField(max_length=4, + choices=WORKER_STATE_CHOICES, + default=WORKER_STATE_RUNNING, + verbose_name=_(u'state')) + + #def disable(self): + # if self.state == WORKER_STATE_DISABLED: + # raise WorkerAlreadyDisabled + # + # self.state = WORKER_STATE_DISABLED + # self.save() + # + #def enable(self): + # if self.state == WORKER_STATE_ENABLED: + # raise WorkerAlreadyEnabled + # + # self.state = WORKER_STATE_ENABLED + # self.save() + # + #def is_enabled(self): + # return self.state == WORKER_STATE_ENABLED + + class Meta: + ordering = ('creation_datetime',) + verbose_name = _(u'worker') + verbose_name_plural = _(u'workers') + +""" +class JobProcessingConfig(Singleton): + worker_time_to_live = models.PositiveInteger(verbose_name=(u'time to live (in seconds)') # After this time a worker is considered dead + worker_heartbeat_interval = models.PositiveInteger(verbose_name=(u'heartbeat interval') + node_heartbeat_interval = models.PositiveInteger(verbose_name=(u'heartbeat interval') + + def __unicode__(self): + return ugettext('Workers configuration') + + class Meta: + verbose_name = verbose_name_plural = _(u'Workers configuration') +""" diff --git a/apps/job_processor/tasks.py b/apps/job_processor/tasks.py new file mode 100644 index 0000000000..7117c49a53 --- /dev/null +++ b/apps/job_processor/tasks.py @@ -0,0 +1,58 @@ +from __future__ import absolute_import + +from datetime import timedelta, datetime +import platform +import logging +import psutil + +from lock_manager import Lock, LockError + +from .models import Node, JobQueue +from .exceptions import JobQueueNoPendingJobs + +LOCK_EXPIRE = 10 +# TODO: Tie LOCK_EXPIRATION with hard task timeout + +logger = logging.getLogger(__name__) + + +def refresh_node(): + logger.debug('starting') + + lock_id = u'refresh_node' + try: + logger.debug('trying to acquire lock: %s' % lock_id) + lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) + logger.debug('acquired lock: %s' % lock_id) + node, created = Node.objects.get_or_create(hostname=platform.node(), defaults={'memory_usage': 0.0}) + node.cpuload = psutil.cpu_percent() + node.memory_usage = psutil.phymem_usage().percent + node.save() + lock.release() + except LockError: + logger.debug('unable to obtain lock') + except Exception: + lock.release() + raise + + +def job_queue_poll(): + logger.debug('starting') + + lock_id = u'job_queue_poll' + try: + logger.debug('trying to acquire lock: %s' % lock_id) + lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) + logger.debug('acquired lock: %s' % lock_id) + for job_queue in JobQueue.objects.all(): + try: + job_item = job_queue.get_oldest_pending_job() + job_item.run() + except JobQueueNoPendingJobs: + logger.debug('no pending jobs for job queue: %s' % job_queue) + lock.release() + except LockError: + logger.debug('unable to obtain lock') + except Exception: + lock.release() + raise diff --git a/apps/lock_manager/models.py b/apps/lock_manager/models.py index a70af8230c..89b49e5881 100644 --- a/apps/lock_manager/models.py +++ b/apps/lock_manager/models.py @@ -2,7 +2,8 @@ from __future__ import absolute_import import datetime -from django.db import models +from django.db import close_connection +from django.db import models, transaction from django.utils.translation import ugettext_lazy as _ from .managers import LockManager @@ -26,13 +27,17 @@ class Lock(models.Model): super(Lock, self).save(*args, **kwargs) + @transaction.commit_on_success def release(self): + close_connection() try: lock = Lock.objects.get(name=self.name, creation_datetime=self.creation_datetime) lock.delete() except Lock.DoesNotExist: # Out lock expired and was reassigned pass + except DatabaseError: + transaction.rollback() class Meta: verbose_name = _(u'lock') diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index f824a23e9d..7ea9921e14 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -17,11 +17,11 @@ from project_tools.api import register_tool from acls.api import class_permissions from scheduler.api import register_interval_job from statistics.api import register_statistics -from queue_manager.models import Queue +from job_processor.models import JobQueue, JobType from .conf.settings import (AUTOMATIC_OCR, QUEUE_PROCESSING_INTERVAL) from .models import OCRProcessingSingleton -from .tasks import task_process_document_queues +from .api import do_document_ocr from .permissions import PERMISSION_OCR_DOCUMENT from .exceptions import AlreadyQueued from . import models as ocr_models @@ -29,6 +29,7 @@ from .statistics import get_statistics from .literals import OCR_QUEUE_NAME logger = logging.getLogger(__name__) +ocr_job_queue = None from .links import (submit_document, re_queue_multiple_document, queue_document_multiple_delete, ocr_disable, @@ -37,21 +38,17 @@ from .links import (submit_document, re_queue_multiple_document, bind_links([Document], [submit_document]) bind_links([OCRProcessingSingleton], [ocr_disable, ocr_enable]) -#bind_links([QueueTransformation], [setup_queue_transformation_edit, setup_queue_transformation_delete]) - #register_multi_item_links(['queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete]) -#bind_links(['setup_queue_transformation_create', 'setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'document_queue_disable', 'document_queue_enable', 'queue_document_list', 'setup_queue_transformation_list'], [queue_document_list], menu_name='secondary_menu') -#bind_links(['setup_queue_transformation_edit', 'setup_queue_transformation_delete', 'setup_queue_transformation_list', 'setup_queue_transformation_create'], [setup_queue_transformation_create], menu_name='sidebar') - register_maintenance_links([all_document_ocr_cleanup], namespace='ocr', title=_(u'OCR')) -#register_multi_item_links(['folder_view', 'search', 'results', 'index_instance_node_view', 'document_find_duplicates', 'document_type_document_list', 'document_group_view', 'document_list', 'document_list_recent'], [submit_document_multiple]) +register_multi_item_links(['folder_view', 'search', 'results', 'index_instance_node_view', 'document_find_duplicates', 'document_type_document_list', 'document_group_view', 'document_list', 'document_list_recent'], [submit_document_multiple]) @transaction.commit_on_success -def create_ocr_queue(): +def create_ocr_job_queue(): + global ocr_job_queue try: - queue, created = Queue.objects.get_or_create(name=OCR_QUEUE_NAME, defaults={'label': _('OCR'), 'unique_names': True}) + ocr_job_queue, created = JobQueue.objects.get_or_create(name=OCR_QUEUE_NAME, defaults={'label': _('OCR'), 'unique_jobs': True}) except DatabaseError: transaction.rollback() @@ -76,12 +73,6 @@ def document_post_save(sender, instance, **kwargs): # logger.debug('got call_queue signal: %s' % kwargs) # task_process_document_queues() - -#@receiver(post_syncdb, dispatch_uid='create_ocr_queue_on_syncdb', sender=ocr_models) -#def create_ocr_queue_on_syncdb(sender, **kwargs): - -#register_interval_job('task_process_document_queues', _(u'Checks the OCR queue for pending documents.'), task_process_document_queues, seconds=QUEUE_PROCESSING_INTERVAL) - register_tool(ocr_tool_link) class_permissions(Document, [ @@ -89,4 +80,5 @@ class_permissions(Document, [ ]) #register_statistics(get_statistics) -create_ocr_queue() +create_ocr_job_queue() +ocr_job_type = JobType('ocr', _(u'OCR'), do_document_ocr) diff --git a/apps/ocr/admin.py b/apps/ocr/admin.py deleted file mode 100644 index 1689ad7a52..0000000000 --- a/apps/ocr/admin.py +++ /dev/null @@ -1,22 +0,0 @@ -""" -from __future__ import absolute_import - -from django.contrib import admin - -from .models import DocumentQueue, QueueDocument - - -class QueueDocumentInline(admin.StackedInline): - model = QueueDocument - extra = 1 - classes = ('collapse-open',) - allow_add = True - - -class DocumentQueueAdmin(admin.ModelAdmin): - inlines = [QueueDocumentInline] - list_display = ('name', 'label', 'state') - - -admin.site.register(DocumentQueue, DocumentQueueAdmin) -""" diff --git a/apps/ocr/api.py b/apps/ocr/api.py index 2cc3fad6f4..5af659a4b3 100644 --- a/apps/ocr/api.py +++ b/apps/ocr/api.py @@ -12,7 +12,7 @@ from django.utils.importlib import import_module from common.conf.settings import TEMPORARY_DIRECTORY from converter.api import convert -from documents.models import DocumentPage +from documents.models import DocumentPage, DocumentVersion from .conf.settings import (TESSERACT_PATH, TESSERACT_LANGUAGE, UNPAPER_PATH) from .exceptions import TesseractError, UnpaperError @@ -81,25 +81,25 @@ def run_tesseract(input_filename, lang=None): return text -def do_document_ocr(queue_document): +def do_document_ocr(document_version_pk): """ Try first to extract text from document pages using the registered parser, if the parser fails or if there is no parser registered for the document mimetype do a visual OCR by calling tesseract """ - for document_page in queue_document.document_version.pages.all(): + document_version = DocumentVersion.objects.get(pk=document_version_pk) + for document_page in document_version.pages.all(): try: # Try to extract text by means of a parser parse_document_page(document_page) except (ParserError, ParserUnknownFile): # Fall back to doing visual OCR - ocr_transformations, warnings = queue_document.get_transformation_list() document_filepath = document_page.document.get_image_cache_name(page=document_page.page_number, version=document_page.document_version.pk) unpaper_output_filename = u'%s_unpaper_out_page_%s%s%s' % (document_page.document.uuid, document_page.page_number, os.extsep, UNPAPER_FILE_FORMAT) unpaper_output_filepath = os.path.join(TEMPORARY_DIRECTORY, unpaper_output_filename) - unpaper_input = convert(document_filepath, file_format=UNPAPER_FILE_FORMAT, transformations=ocr_transformations) + unpaper_input = convert(document_filepath, file_format=UNPAPER_FILE_FORMAT) execute_unpaper(input_filepath=unpaper_input, output_filepath=unpaper_output_filepath) #from PIL import Image, ImageOps diff --git a/apps/ocr/forms.py b/apps/ocr/forms.py deleted file mode 100644 index 0fde716bbb..0000000000 --- a/apps/ocr/forms.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -from __future__ import absolute_import - -from django import forms - -from .models import QueueTransformation - - -class QueueTransformationForm(forms.ModelForm): - class Meta: - model = QueueTransformation - - def __init__(self, *args, **kwargs): - super(QueueTransformationForm, self).__init__(*args, **kwargs) - self.fields['content_type'].widget = forms.HiddenInput() - self.fields['object_id'].widget = forms.HiddenInput() - - -class QueueTransformationForm_create(forms.ModelForm): - class Meta: - model = QueueTransformation - exclude = ('content_type', 'object_id') -""" diff --git a/apps/ocr/links.py b/apps/ocr/links.py index 5c708ae4b2..84a01e8238 100644 --- a/apps/ocr/links.py +++ b/apps/ocr/links.py @@ -30,8 +30,3 @@ queue_document_multiple_delete = Link(text=_(u'delete'), view='queue_document_mu all_document_ocr_cleanup = Link(text=_(u'clean up pages content'), view='all_document_ocr_cleanup', sprite='text_strikethrough', permissions=[PERMISSION_OCR_CLEAN_ALL_PAGES], description=_(u'Runs a language filter to remove common OCR mistakes from document pages content.')) ocr_tool_link = Link(text=_(u'OCR'), view='ocr_log', sprite='hourglass', icon='text.png', permissions=[PERMISSION_OCR_DOCUMENT]) # children_view_regex=[r'queue_', r'document_queue']) - -#setup_queue_transformation_list = Link(text=_(u'transformations'), view='setup_queue_transformation_list', args='queue.pk', sprite='shape_move_front') -#setup_queue_transformation_create = Link(text=_(u'add transformation'), view='setup_queue_transformation_create', args='queue.pk', sprite='shape_square_add') -#setup_queue_transformation_edit = Link(text=_(u'edit'), view='setup_queue_transformation_edit', args='transformation.pk', sprite='shape_square_edit') -#setup_queue_transformation_delete = Link(text=_(u'delete'), view='setup_queue_transformation_delete', args='transformation.pk', sprite='shape_square_delete') diff --git a/apps/ocr/literals.py b/apps/ocr/literals.py index 761cd017d5..b7d10f8615 100644 --- a/apps/ocr/literals.py +++ b/apps/ocr/literals.py @@ -9,17 +9,6 @@ OCR_STATE_CHOICES = ( (OCR_STATE_ENABLED, _(u'enabled')), ) - -#QUEUEDOCUMENT_STATE_PENDING = 'p' -#QUEUEDOCUMENT_STATE_PROCESSING = 'i' -#QUEUEDOCUMENT_STATE_ERROR = 'e' - -#QUEUEDOCUMENT_STATE_CHOICES = ( -# (QUEUEDOCUMENT_STATE_PENDING, _(u'pending')), -# (QUEUEDOCUMENT_STATE_PROCESSING, _(u'processing')), -# (QUEUEDOCUMENT_STATE_ERROR, _(u'error')), -#) - DEFAULT_OCR_FILE_FORMAT = u'tiff' DEFAULT_OCR_FILE_EXTENSION = u'tif' UNPAPER_FILE_FORMAT = u'ppm' diff --git a/apps/ocr/models.py b/apps/ocr/models.py index 3cecf15951..9898db7060 100644 --- a/apps/ocr/models.py +++ b/apps/ocr/models.py @@ -53,146 +53,3 @@ class OCRProcessingSingleton(Singleton): class Meta: verbose_name = verbose_name_plural = _(u'OCR processing properties') - -""" -class OCRLog(models.Model): - #queue = models.ForeignKey(Queue, verbose_name=_(u'queue')) - document_version = models.ForeignKey(DocumentVersion, verbose_name=_(u'document version')) - datetime = models.DateTimeField(verbose_name=_(u'date time'), default=lambda: datetime.datetime.now(), db_index=True) - delay = models.BooleanField(verbose_name=_(u'delay OCR'), default=False) - #state = models.CharField(max_length=4, - # choices=QUEUEDOCUMENT_STATE_CHOICES, - # default=QUEUEDOCUMENT_STATE_PENDING, - # verbose_name=_(u'state')) - result = models.TextField(blank=True, null=True, verbose_name=_(u'result')) - #node_name = models.CharField(max_length=32, verbose_name=_(u'node name'), blank=True, null=True) - - class Meta: - ordering = ('datetime',) - verbose_name = _(u'OCR log entry') - verbose_name_plural = _(u'OCR log entries') - - #def get_transformation_list(self): - # return QueueTransformation.transformations.get_for_object_as_list(self) - - def requeue(self): - pass - #if self.state == QUEUEDOCUMENT_STATE_PROCESSING: - # raise ReQueueError - #else: - # self.datetime_submitted = datetime.now() - # self.state = QUEUEDOCUMENT_STATE_PENDING - # self.delay = False - # self.result = None - # self.node_name = None - # self.save() - - def __unicode__(self): - try: - return unicode(self.document) - except ObjectDoesNotExist: - return ugettext(u'Missing document.') -""" - -#class DocumentQueue(models.Model): -# name = models.CharField(max_length=64, unique=True, verbose_name=_(u'name')) -# label = models.CharField(max_length=64, verbose_name=_(u'label')) -# state = models.CharField(max_length=4, -# choices=DOCUMENTQUEUE_STATE_CHOICES, -# default=DOCUMENTQUEUE_STATE_ACTIVE, -# verbose_name=_(u'state')) -# -# objects = DocumentQueueManager()# -# -# class Meta: -# verbose_name = _(u'document queue') -# verbose_name_plural = _(u'document queues')# -# -# def __unicode__(self): -# return self.label - - - -""" -class QueueDocument(models.Model): - document_queue = models.ForeignKey(DocumentQueue, verbose_name=_(u'document queue')) - document = models.ForeignKey(Document, verbose_name=_(u'document')) - datetime_submitted = models.DateTimeField(verbose_name=_(u'date time submitted'), auto_now_add=True, db_index=True) - delay = models.BooleanField(verbose_name=_(u'delay ocr'), default=False) - state = models.CharField(max_length=4, - choices=QUEUEDOCUMENT_STATE_CHOICES, - default=QUEUEDOCUMENT_STATE_PENDING, - verbose_name=_(u'state')) - result = models.TextField(blank=True, null=True, verbose_name=_(u'result')) - node_name = models.CharField(max_length=32, verbose_name=_(u'node name'), blank=True, null=True) - - class Meta: - ordering = ('datetime_submitted',) - verbose_name = _(u'queue document') - verbose_name_plural = _(u'queue documents') - - def get_transformation_list(self): - return QueueTransformation.transformations.get_for_object_as_list(self) - - def requeue(self): - if self.state == QUEUEDOCUMENT_STATE_PROCESSING: - raise ReQueueError - else: - self.datetime_submitted = datetime.now() - self.state = QUEUEDOCUMENT_STATE_PENDING - self.delay = False - self.result = None - self.node_name = None - self.save() - - def __unicode__(self): - try: - return unicode(self.document) - except ObjectDoesNotExist: - return ugettext(u'Missing document.') - - -class ArgumentsValidator(object): - message = _(u'Enter a valid value.') - code = 'invalid' - - def __init__(self, message=None, code=None): - if message is not None: - self.message = message - if code is not None: - self.code = code - - def __call__(self, value): - ''' - Validates that the input evaluates correctly. - ''' - value = value.strip() - try: - literal_eval(value) - except (ValueError, SyntaxError): - raise ValidationError(self.message, code=self.code) - - -class QueueTransformation(models.Model): - ''' - Model that stores the transformation and transformation arguments - for a given document queue - ''' - content_type = models.ForeignKey(ContentType) - object_id = models.PositiveIntegerField() - content_object = generic.GenericForeignKey('content_type', 'object_id') - order = models.PositiveIntegerField(default=0, blank=True, null=True, verbose_name=_(u'order'), db_index=True) - transformation = models.CharField(choices=get_available_transformations_choices(), max_length=128, verbose_name=_(u'transformation')) - arguments = models.TextField(blank=True, null=True, verbose_name=_(u'arguments'), help_text=_(u'Use dictionaries to indentify arguments, example: %s') % u'{\'degrees\':90}', validators=[ArgumentsValidator()]) - - objects = models.Manager() - transformations = SourceTransformationManager() - - def __unicode__(self): - return self.get_transformation_display() - - class Meta: - ordering = ('order',) - verbose_name = _(u'document queue transformation') - verbose_name_plural = _(u'document queue transformations') -""" diff --git a/apps/ocr/tasks.py b/apps/ocr/tasks.py deleted file mode 100644 index 9780c8b2a8..0000000000 --- a/apps/ocr/tasks.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import absolute_import - -from datetime import timedelta, datetime -import platform -import logging - -from django.db.models import Q - -from job_processor.api import process_job -from lock_manager import Lock, LockError - -from .api import do_document_ocr -#from .literals import (QUEUEDOCUMENT_STATE_PENDING, -# QUEUEDOCUMENT_STATE_PROCESSING, DOCUMENTQUEUE_STATE_ACTIVE, -# QUEUEDOCUMENT_STATE_ERROR) -#from .models import QueueDocument, DocumentQueue -from .conf.settings import NODE_CONCURRENT_EXECUTION, REPLICATION_DELAY - -LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes -# TODO: Tie LOCK_EXPIRATION with hard task timeout - -logger = logging.getLogger(__name__) - - -def task_process_queue_document(queue_document_id): - lock_id = u'task_proc_queue_doc-%d' % queue_document_id - try: - logger.debug('trying to acquire lock: %s' % lock_id) - lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) - logger.debug('acquired lock: %s' % lock_id) - queue_document = QueueDocument.objects.get(pk=queue_document_id) - queue_document.state = QUEUEDOCUMENT_STATE_PROCESSING - queue_document.node_name = platform.node() - queue_document.save() - try: - do_document_ocr(queue_document) - queue_document.delete() - except Exception, e: - queue_document.state = QUEUEDOCUMENT_STATE_ERROR - queue_document.result = e - queue_document.save() - - lock.release() - except LockError: - logger.debug('unable to obtain lock') - pass - - -def task_process_document_queues(): - logger.debug('executed') - # TODO: reset_orphans() - q_pending = Q(state=QUEUEDOCUMENT_STATE_PENDING) - q_delayed = Q(delay=True) - q_delay_interval = Q(datetime_submitted__lt=datetime.now() - timedelta(seconds=REPLICATION_DELAY)) - for document_queue in DocumentQueue.objects.filter(state=DOCUMENTQUEUE_STATE_ACTIVE): - current_local_processing_count = QueueDocument.objects.filter( - state=QUEUEDOCUMENT_STATE_PROCESSING).filter( - node_name=platform.node()).count() - if current_local_processing_count < NODE_CONCURRENT_EXECUTION: - try: - oldest_queued_document_qs = document_queue.queuedocument_set.filter( - (q_pending & ~q_delayed) | (q_pending & q_delayed & q_delay_interval)) - - if oldest_queued_document_qs: - oldest_queued_document = oldest_queued_document_qs.order_by('datetime_submitted')[0] - process_job(task_process_queue_document, oldest_queued_document.pk) - except Exception, e: - logger.error('unhandled exception: %s' % e) - finally: - # Don't process anymore from this queryset, might be stale - break - else: - logger.debug('already processing maximun') - else: - logger.debug('nothing to process') diff --git a/apps/ocr/views.py b/apps/ocr/views.py index d4dcb109d6..1c019be7ac 100644 --- a/apps/ocr/views.py +++ b/apps/ocr/views.py @@ -14,17 +14,16 @@ from documents.models import Document from documents.widgets import document_link, document_thumbnail from common.utils import encapsulate from acls.models import AccessEntry +from job_processor.exceptions import JobQueuePushError from .permissions import (PERMISSION_OCR_DOCUMENT, PERMISSION_OCR_DOCUMENT_DELETE, PERMISSION_OCR_QUEUE_ENABLE_DISABLE, PERMISSION_OCR_CLEAN_ALL_PAGES, PERMISSION_OCR_QUEUE_EDIT) from .models import OCRProcessingSingleton -#from .literals import (QUEUEDOCUMENT_STATE_PROCESSING, -# DOCUMENTQUEUE_STATE_ACTIVE, DOCUMENTQUEUE_STATE_STOPPED) from .exceptions import (AlreadyQueued, ReQueueError, OCRProcessingAlreadyDisabled, OCRProcessingAlreadyEnabled) from .api import clean_pages -#from .forms import QueueTransformationForm, QueueTransformationForm_create +from . import ocr_job_queue, ocr_job_type def ocr_log(request): @@ -195,15 +194,15 @@ def submit_document(request, document_id): def submit_document_to_queue(request, document, post_submit_redirect=None): - ''' + """ This view is meant to be reusable - ''' + """ try: - document_queue = DocumentQueue.objects.queue_document(document) - messages.success(request, _(u'Document: %(document)s was added to the OCR queue: %(queue)s.') % { - 'document': document, 'queue': document_queue.label}) - except AlreadyQueued: + ocr_job_queue.push(ocr_job_type, document_version_pk=document.latest_version.pk) + messages.success(request, _(u'Document: %(document)s was added to the OCR queue sucessfully.') % { + 'document': document}) + except JobQueuePushError: messages.warning(request, _(u'Document: %(document)s is already queued.') % { 'document': document}) except Exception, e: diff --git a/apps/queue_manager/__init__.py b/apps/queue_manager/__init__.py deleted file mode 100755 index 5b40c4e8ec..0000000000 --- a/apps/queue_manager/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from queue_manager.models import Queue as QueueModel, QueuePushError - -class Queue(object): - @classmethod - def __new__(cls, name, queue_name, label=None, unique_names=False): - name = queue_name - if not label: - label=u'' - queue, created = QueueModel.objects.get_or_create( - name=name, - defaults={ - 'label': label, - 'unique_names': unique_names - } - ) - if not created: - queue.label = label - queue.unique_names = unique_names - queue.save() - return queue diff --git a/apps/queue_manager/admin.py b/apps/queue_manager/admin.py deleted file mode 100755 index 14bb1dcb74..0000000000 --- a/apps/queue_manager/admin.py +++ /dev/null @@ -1,22 +0,0 @@ -from django.contrib import admin - -from django.utils.translation import ugettext_lazy as _ - -from queue_manager.models import Queue, QueueItem - - -class QueueItemInline(admin.StackedInline): - model = QueueItem - - -class QueueAdmin(admin.ModelAdmin): - model = Queue - list_display = ('name', 'label', 'total_items') - inlines = [QueueItemInline] - - def total_items(self, obj): - return obj.items.all().count() - total_items.short_description = _(u'total items') - - -admin.site.register(Queue, QueueAdmin) diff --git a/apps/queue_manager/exceptions.py b/apps/queue_manager/exceptions.py deleted file mode 100644 index fc356e3b69..0000000000 --- a/apps/queue_manager/exceptions.py +++ /dev/null @@ -1,6 +0,0 @@ -class QueueException(Exception): - pass - - -class QueuePushError(QueueException): - pass diff --git a/apps/queue_manager/migrations/0001_initial.py b/apps/queue_manager/migrations/0001_initial.py deleted file mode 100644 index 22215e726d..0000000000 --- a/apps/queue_manager/migrations/0001_initial.py +++ /dev/null @@ -1,57 +0,0 @@ -# -*- coding: utf-8 -*- -import datetime -from south.db import db -from south.v2 import SchemaMigration -from django.db import models - - -class Migration(SchemaMigration): - - def forwards(self, orm): - # Adding model 'Queue' - db.create_table('queue_manager_queue', ( - ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), - ('name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=32)), - ('unique_names', self.gf('django.db.models.fields.BooleanField')(default=False)), - )) - db.send_create_signal('queue_manager', ['Queue']) - - # Adding model 'QueueItem' - db.create_table('queue_manager_queueitem', ( - ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), - ('queue', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['queue_manager.Queue'])), - ('creation_datetime', self.gf('django.db.models.fields.DateTimeField')()), - ('unique_name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=32, blank=True)), - ('name', self.gf('django.db.models.fields.CharField')(max_length=32, blank=True)), - ('data', self.gf('django.db.models.fields.TextField')()), - )) - db.send_create_signal('queue_manager', ['QueueItem']) - - - def backwards(self, orm): - # Deleting model 'Queue' - db.delete_table('queue_manager_queue') - - # Deleting model 'QueueItem' - db.delete_table('queue_manager_queueitem') - - - models = { - 'queue_manager.queue': { - 'Meta': {'object_name': 'Queue'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}), - 'unique_names': ('django.db.models.fields.BooleanField', [], {'default': 'False'}) - }, - 'queue_manager.queueitem': { - 'Meta': {'object_name': 'QueueItem'}, - 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {}), - 'data': ('django.db.models.fields.TextField', [], {}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '32', 'blank': 'True'}), - 'queue': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['queue_manager.Queue']"}), - 'unique_name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32', 'blank': 'True'}) - } - } - - complete_apps = ['queue_manager'] \ No newline at end of file diff --git a/apps/queue_manager/migrations/__init__.py b/apps/queue_manager/migrations/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/apps/queue_manager/models.py b/apps/queue_manager/models.py deleted file mode 100755 index e82781723b..0000000000 --- a/apps/queue_manager/models.py +++ /dev/null @@ -1,95 +0,0 @@ -from __future__ import absolute_import - -from datetime import datetime - -from django.db import models -from django.utils.translation import ugettext_lazy as _ -from django.utils.simplejson import loads, dumps -from django.db import IntegrityError - -from .exceptions import QueuePushError - -queue_labels = {} - - -class QueueManager(models.Manager): - def get_or_create(self, *args, **kwargs): - queue_labels[kwargs.get('name')] = kwargs.get('defaults', {}).get('label') - return super(QueueManager, self).get_or_create(*args, **kwargs) - - -class Queue(models.Model): - # Internal name - name = models.CharField(max_length=32, verbose_name=_(u'name'), unique=True) - unique_names = models.BooleanField(verbose_name=_(u'unique names'), default=False) - - objects = QueueManager() - - def __unicode__(self): - return unicode(self.label) or self.name - - @property - def label(self): - return queue_labels.get(self.name) - - def push(self, data, name=None): # TODO: add replace flag - if not name: - name = u'' - queue_item = QueueItem(queue=self, name=name, data=dumps(data)) - queue_item.save() - return queue_item - - def pull(self): - queue_item_qs = QueueItem.objects.filter(queue=self).order_by('-creation_datetime') - if queue_item_qs: - queue_item = queue_item_qs[0] - queue_item.delete() - return loads(queue_item.data) - - @property - def items(self): - return self.queueitem_set - - def empty(self): - self.items.all().delete() - - def save(self, *args, **kwargs): - label = getattr(self, 'label', None) - if label: - queue_labels[self.name] = label - return super(Queue, self).save(*args, **kwargs) - - # TODO: custom runtime methods - - class Meta: - verbose_name = _(u'queue') - verbose_name_plural = _(u'queues') - - -class QueueItem(models.Model): - queue = models.ForeignKey(Queue, verbose_name=_(u'queue')) - creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime'), editable=False) - unique_name = models.CharField(blank=True, max_length=32, verbose_name=_(u'name'), unique=True, editable=False) - name = models.CharField(blank=True, max_length=32, verbose_name=_(u'name')) - data = models.TextField(verbose_name=_(u'data')) - - def __unicode__(self): - return self.name - - def save(self, *args, **kwargs): - self.creation_datetime = datetime.now() - - if self.queue.unique_names: - self.unique_name = self.name - else: - self.unique_name = unicode(self.creation_datetime) - try: - super(QueueItem, self).save(*args, **kwargs) - except IntegrityError: - # TODO: Maybe replace instead or rasining exception w/ replace flag - raise QueuePushError - - class Meta: - verbose_name = _(u'queue item') - verbose_name_plural = _(u'queue items') - diff --git a/apps/queue_manager/views.py b/apps/queue_manager/views.py deleted file mode 100755 index 60f00ef0ef..0000000000 --- a/apps/queue_manager/views.py +++ /dev/null @@ -1 +0,0 @@ -# Create your views here. diff --git a/apps/scheduler/__init__.py b/apps/scheduler/__init__.py index 18d7ea6e9f..e9e693b024 100644 --- a/apps/scheduler/__init__.py +++ b/apps/scheduler/__init__.py @@ -17,6 +17,9 @@ from .links import job_list logger = logging.getLogger(__name__) + +# TODO: shutdown scheduler on pre_syncdb to avoid accessing non existing models + @receiver(post_syncdb, dispatch_uid='scheduler_shutdown_post_syncdb') def scheduler_shutdown_post_syncdb(sender, **kwargs): logger.debug('Scheduler shut down on post syncdb signal') diff --git a/settings.py b/settings.py index e9d25d00c7..c627ac33f6 100644 --- a/settings.py +++ b/settings.py @@ -154,7 +154,6 @@ INSTALLED_APPS = ( 'navigation', 'lock_manager', 'web_theme', - 'queue_manager', # pagination needs to go after web_theme so that the pagination template is found 'pagination', 'common', @@ -188,7 +187,7 @@ INSTALLED_APPS = ( 'workflows', 'checkouts', 'rest_api', - 'bootstrap', + #'bootstrap', 'statistics', # Has to be last so the other apps can register it's signals From e0abb02df0471cc682a99819710bae62352fc19a Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 09:22:37 -0400 Subject: [PATCH 07/40] Improve Node.objects.myself to do a refresh --- apps/job_processor/models.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/apps/job_processor/models.py b/apps/job_processor/models.py index 90859397a6..254ea0e7fa 100644 --- a/apps/job_processor/models.py +++ b/apps/job_processor/models.py @@ -30,7 +30,7 @@ class Job(object): close_connection() # Run sync or launch async subprocess # OR launch 2 processes: monitor & actual process - node = Node.objects.get_myself() + node = Node.objects.myself() worker = Worker.objects.create(node=node, name=u'%s-%d' % (node.hostname, os.getpid())) try: close_connection() @@ -63,12 +63,12 @@ class JobType(object): job_queue_item.save() p = Process(target=Job, args=(self.function, job_queue_item,)) p.start() - #p.join() class NodeManager(models.Manager): - def get_myself(self): - return self.model.objects.get(hostname=platform.node()) + def myself(self): + node = self.model.objects.get_or_create(hostname=platform.node(), defaults={'memory_usage': 100}) + node.refresh() class Node(models.Model): @@ -81,6 +81,11 @@ class Node(models.Model): def __unicode__(self): return self.hostname + + def refresh(self): + node.cpuload = psutil.cpu_percent() + node.memory_usage = psutil.phymem_usage().percent + node.save() def save(self, *args, **kwargs): self.heartbeat = datetime.datetime.now() From 3762f90adaabc9128487f77742876fc50f352df3 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 09:23:10 -0400 Subject: [PATCH 08/40] Use the new .myself method of Node --- apps/job_processor/tasks.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/apps/job_processor/tasks.py b/apps/job_processor/tasks.py index 7117c49a53..f3e1d04b1f 100644 --- a/apps/job_processor/tasks.py +++ b/apps/job_processor/tasks.py @@ -24,10 +24,7 @@ def refresh_node(): logger.debug('trying to acquire lock: %s' % lock_id) lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) logger.debug('acquired lock: %s' % lock_id) - node, created = Node.objects.get_or_create(hostname=platform.node(), defaults={'memory_usage': 0.0}) - node.cpuload = psutil.cpu_percent() - node.memory_usage = psutil.phymem_usage().percent - node.save() + node = Node.objects.myself() # Automatically calls the refresh() method too lock.release() except LockError: logger.debug('unable to obtain lock') From 28f9f32d91bb045f0ef7b852d55a1f0523fa309e Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 10:54:43 -0400 Subject: [PATCH 09/40] Add decorator helper for simple locks --- apps/lock_manager/decorators.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 apps/lock_manager/decorators.py diff --git a/apps/lock_manager/decorators.py b/apps/lock_manager/decorators.py new file mode 100644 index 0000000000..c98a7ae1c9 --- /dev/null +++ b/apps/lock_manager/decorators.py @@ -0,0 +1,33 @@ +from __future__ import absolute_import + +from functools import wraps + +from . import logger +from . import Lock +from .exceptions import LockError + + +def simple_locking(lock_id, expiration=None): + """ + A decorator that wraps a function in a single lock getting algorithm + """ + def inner_decorator(function): + def wrapper(*args, **kwargs): + try: + # Trying to acquire lock + lock = Lock.acquire_lock(lock_id, expiration) + except LockError: + # Unable to acquire lock + pass + except Exception: + # Unhandled error, release lock + lock.release() + raise + else: + # Lock acquired, proceed normally, release lock afterwards + logger.debug('acquired lock: %s' % lock_id) + result = function(*args, **kwargs) + lock.release() + return result + return wraps(function)(wrapper) + return inner_decorator From ba2f2fc455b4560e48f7140dd1e601d68e8a2d85 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 10:55:05 -0400 Subject: [PATCH 10/40] Add database migrations for the job processor app --- apps/job_processor/migrations/0001_initial.py | 105 ++++++++++++++++++ apps/job_processor/migrations/__init__.py | 0 2 files changed, 105 insertions(+) create mode 100644 apps/job_processor/migrations/0001_initial.py create mode 100644 apps/job_processor/migrations/__init__.py diff --git a/apps/job_processor/migrations/0001_initial.py b/apps/job_processor/migrations/0001_initial.py new file mode 100644 index 0000000000..771b740999 --- /dev/null +++ b/apps/job_processor/migrations/0001_initial.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'Node' + db.create_table('job_processor_node', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('hostname', self.gf('django.db.models.fields.CharField')(max_length=255)), + ('cpuload', self.gf('django.db.models.fields.PositiveIntegerField')(default=0, blank=True)), + ('heartbeat', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 7, 30, 0, 0), blank=True)), + ('memory_usage', self.gf('django.db.models.fields.FloatField')(blank=True)), + )) + db.send_create_signal('job_processor', ['Node']) + + # Adding model 'JobQueue' + db.create_table('job_processor_jobqueue', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=32)), + ('unique_jobs', self.gf('django.db.models.fields.BooleanField')(default=True)), + )) + db.send_create_signal('job_processor', ['JobQueue']) + + # Adding model 'JobQueueItem' + db.create_table('job_processor_jobqueueitem', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('job_queue', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['job_processor.JobQueue'])), + ('creation_datetime', self.gf('django.db.models.fields.DateTimeField')()), + ('unique_id', self.gf('django.db.models.fields.CharField')(unique=True, max_length=64, blank=True)), + ('job_type', self.gf('django.db.models.fields.CharField')(max_length=32)), + ('kwargs', self.gf('django.db.models.fields.TextField')()), + ('state', self.gf('django.db.models.fields.CharField')(default='p', max_length=4)), + ('result', self.gf('django.db.models.fields.TextField')(blank=True)), + )) + db.send_create_signal('job_processor', ['JobQueueItem']) + + # Adding model 'Worker' + db.create_table('job_processor_worker', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('node', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['job_processor.Node'])), + ('name', self.gf('django.db.models.fields.CharField')(max_length=255)), + ('creation_datetime', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 7, 30, 0, 0))), + ('heartbeat', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 7, 30, 0, 0), blank=True)), + ('state', self.gf('django.db.models.fields.CharField')(default='r', max_length=4)), + )) + db.send_create_signal('job_processor', ['Worker']) + + + def backwards(self, orm): + # Deleting model 'Node' + db.delete_table('job_processor_node') + + # Deleting model 'JobQueue' + db.delete_table('job_processor_jobqueue') + + # Deleting model 'JobQueueItem' + db.delete_table('job_processor_jobqueueitem') + + # Deleting model 'Worker' + db.delete_table('job_processor_worker') + + + models = { + 'job_processor.jobqueue': { + 'Meta': {'object_name': 'JobQueue'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}), + 'unique_jobs': ('django.db.models.fields.BooleanField', [], {'default': 'True'}) + }, + 'job_processor.jobqueueitem': { + 'Meta': {'ordering': "('creation_datetime',)", 'object_name': 'JobQueueItem'}, + 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'job_queue': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['job_processor.JobQueue']"}), + 'job_type': ('django.db.models.fields.CharField', [], {'max_length': '32'}), + 'kwargs': ('django.db.models.fields.TextField', [], {}), + 'result': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'state': ('django.db.models.fields.CharField', [], {'default': "'p'", 'max_length': '4'}), + 'unique_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'blank': 'True'}) + }, + 'job_processor.node': { + 'Meta': {'object_name': 'Node'}, + 'cpuload': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'blank': 'True'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'memory_usage': ('django.db.models.fields.FloatField', [], {'blank': 'True'}) + }, + 'job_processor.worker': { + 'Meta': {'ordering': "('creation_datetime',)", 'object_name': 'Worker'}, + 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'node': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['job_processor.Node']"}), + 'state': ('django.db.models.fields.CharField', [], {'default': "'r'", 'max_length': '4'}) + } + } + + complete_apps = ['job_processor'] \ No newline at end of file diff --git a/apps/job_processor/migrations/__init__.py b/apps/job_processor/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 From 96a1b682218dcc3d9b4d0acd1c77f96ecfdd0d23 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 10:55:32 -0400 Subject: [PATCH 11/40] Fix some simple typos --- apps/job_processor/models.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/apps/job_processor/models.py b/apps/job_processor/models.py index 254ea0e7fa..fdd4b9784e 100644 --- a/apps/job_processor/models.py +++ b/apps/job_processor/models.py @@ -7,6 +7,8 @@ import hashlib import platform from multiprocessing import Process +import psutil + from django.db import models, IntegrityError, transaction from django.db import close_connection from django.contrib.contenttypes import generic @@ -67,8 +69,9 @@ class JobType(object): class NodeManager(models.Manager): def myself(self): - node = self.model.objects.get_or_create(hostname=platform.node(), defaults={'memory_usage': 100}) + node, created = self.model.objects.get_or_create(hostname=platform.node(), defaults={'memory_usage': 100}) node.refresh() + return node class Node(models.Model): @@ -83,9 +86,9 @@ class Node(models.Model): return self.hostname def refresh(self): - node.cpuload = psutil.cpu_percent() - node.memory_usage = psutil.phymem_usage().percent - node.save() + self.cpuload = psutil.cpu_percent() + self.memory_usage = psutil.phymem_usage().percent + self.save() def save(self, *args, **kwargs): self.heartbeat = datetime.datetime.now() From 198538df4f311dc981fc358ca4220779569b2f24 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 10:56:12 -0400 Subject: [PATCH 12/40] Use the new simple lock decorator to simplify a job processor task --- apps/job_processor/tasks.py | 58 ++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/apps/job_processor/tasks.py b/apps/job_processor/tasks.py index f3e1d04b1f..4fa131ff7d 100644 --- a/apps/job_processor/tasks.py +++ b/apps/job_processor/tasks.py @@ -1,55 +1,47 @@ from __future__ import absolute_import -from datetime import timedelta, datetime -import platform import logging -import psutil from lock_manager import Lock, LockError +from lock_manager.decorators import simple_locking from .models import Node, JobQueue from .exceptions import JobQueueNoPendingJobs LOCK_EXPIRE = 10 # TODO: Tie LOCK_EXPIRATION with hard task timeout +MAX_CPU_LOAD = 90 +MAX_MEMORY_USAGE = 90 logger = logging.getLogger(__name__) +@simple_locking('refresh_node', 10) def refresh_node(): logger.debug('starting') - - lock_id = u'refresh_node' - try: - logger.debug('trying to acquire lock: %s' % lock_id) - lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) - logger.debug('acquired lock: %s' % lock_id) - node = Node.objects.myself() # Automatically calls the refresh() method too - lock.release() - except LockError: - logger.debug('unable to obtain lock') - except Exception: - lock.release() - raise + node = Node.objects.myself() # Automatically calls the refresh() method too def job_queue_poll(): logger.debug('starting') - lock_id = u'job_queue_poll' - try: - logger.debug('trying to acquire lock: %s' % lock_id) - lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) - logger.debug('acquired lock: %s' % lock_id) - for job_queue in JobQueue.objects.all(): - try: - job_item = job_queue.get_oldest_pending_job() - job_item.run() - except JobQueueNoPendingJobs: - logger.debug('no pending jobs for job queue: %s' % job_queue) - lock.release() - except LockError: - logger.debug('unable to obtain lock') - except Exception: - lock.release() - raise + node = Node.objects.myself() # Automatically calls the refresh() method too + if node.cpuload < MAX_CPU_LOAD and node.memory_usage < MAX_MEMORY_USAGE: + # Poll job queues is node is not overloaded + lock_id = u'job_queue_poll' + try: + lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) + except LockError: + pass + except Exception: + lock.release() + raise + else: + for job_queue in JobQueue.objects.all(): + try: + job_item = job_queue.get_oldest_pending_job() + job_item.run() + except JobQueueNoPendingJobs: + logger.debug('no pending jobs for job queue: %s' % job_queue) + lock.release() + From 5b5b6fd785af816a0716b975c3a679c09473512a Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 10:56:57 -0400 Subject: [PATCH 13/40] Improve logging --- apps/lock_manager/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/lock_manager/managers.py b/apps/lock_manager/managers.py index 99145e8c5a..4096c5e85b 100644 --- a/apps/lock_manager/managers.py +++ b/apps/lock_manager/managers.py @@ -31,7 +31,7 @@ class LockManager(models.Manager): except self.model.DoesNotExist: # Table based locking logger.debug('lock: %s does not exist' % name) - raise LockError('Unable to acquire lock') + raise LockError('unable to acquire lock: %s' % name) if datetime.datetime.now() > lock.creation_datetime + datetime.timedelta(seconds=lock.timeout): logger.debug('reseting deleting stale lock: %s' % name) From 243e5c01faededae02acdf8cd8155a42e558acad Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 10:57:16 -0400 Subject: [PATCH 14/40] Add app level loggin used by the new decorator --- apps/lock_manager/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/lock_manager/__init__.py b/apps/lock_manager/__init__.py index 61fe7fbf78..886e3fb0f8 100644 --- a/apps/lock_manager/__init__.py +++ b/apps/lock_manager/__init__.py @@ -1,6 +1,10 @@ from __future__ import absolute_import +import logging + from .exceptions import LockError from .models import Lock as LockModel +logger = logging.getLogger(__name__) + Lock = LockModel.objects From a574aa2c9fca469821b57d4200458d6c751ad144 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 10:58:22 -0400 Subject: [PATCH 15/40] Add missing import --- apps/lock_manager/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/lock_manager/models.py b/apps/lock_manager/models.py index 89b49e5881..d7d47815bd 100644 --- a/apps/lock_manager/models.py +++ b/apps/lock_manager/models.py @@ -3,7 +3,7 @@ from __future__ import absolute_import import datetime from django.db import close_connection -from django.db import models, transaction +from django.db import (models, transaction, DatabaseError) from django.utils.translation import ugettext_lazy as _ from .managers import LockManager @@ -34,7 +34,7 @@ class Lock(models.Model): lock = Lock.objects.get(name=self.name, creation_datetime=self.creation_datetime) lock.delete() except Lock.DoesNotExist: - # Out lock expired and was reassigned + # Lock expired and was reassigned pass except DatabaseError: transaction.rollback() From 86144aa54a8fea66365c1b9ba049f31ccc664d19 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 10:58:40 -0400 Subject: [PATCH 16/40] Update upgrade instructions --- docs/releases/0.13.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/releases/0.13.rst b/docs/releases/0.13.rst index 68afd7b86f..79f73a2b69 100644 --- a/docs/releases/0.13.rst +++ b/docs/releases/0.13.rst @@ -78,6 +78,7 @@ Afterwards migrate existing database schema with:: $ ./manage.py migrate tags 0001 --fake $ ./manage.py migrate linking 0001 --fake $ ./manage.py migrate lock_manager 0001 --fake + $ ./manage.py migrate job_processor Issue the following command to index existing documents in the new full text search database:: From 75f9c47e2bd73faf4b7453545e6bc9101806ff9b Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 12:54:27 -0400 Subject: [PATCH 17/40] Add clustering app --- apps/clustering/__init__.py | 18 ++++++ apps/clustering/admin.py | 13 +++++ apps/clustering/links.py | 10 ++++ apps/clustering/migrations/0001_initial.py | 38 ++++++++++++ apps/clustering/migrations/__init__.py | 0 apps/clustering/models.py | 44 ++++++++++++++ apps/clustering/permissions.py | 8 +++ apps/clustering/tasks.py | 17 ++++++ apps/clustering/urls.py | 6 ++ apps/clustering/views.py | 67 ++++++++++++++++++++++ settings.py | 1 + urls.py | 2 + 12 files changed, 224 insertions(+) create mode 100644 apps/clustering/__init__.py create mode 100644 apps/clustering/admin.py create mode 100644 apps/clustering/links.py create mode 100644 apps/clustering/migrations/0001_initial.py create mode 100644 apps/clustering/migrations/__init__.py create mode 100644 apps/clustering/models.py create mode 100644 apps/clustering/permissions.py create mode 100644 apps/clustering/tasks.py create mode 100644 apps/clustering/urls.py create mode 100644 apps/clustering/views.py diff --git a/apps/clustering/__init__.py b/apps/clustering/__init__.py new file mode 100644 index 0000000000..4149c12347 --- /dev/null +++ b/apps/clustering/__init__.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import + +from django.utils.translation import ugettext_lazy as _ + +from scheduler.api import register_interval_job +from navigation.api import bind_links +from project_tools.api import register_tool + +from .tasks import refresh_node +from .links import tool_link, node_list +from .models import Node + +NODE_REFRESH_INTERVAL = 1 + +register_interval_job('refresh_node', _(u'Update a node\'s properties.'), refresh_node, seconds=NODE_REFRESH_INTERVAL) + +register_tool(tool_link) +bind_links([Node, 'node_list'], [node_list], menu_name='secondary_menu') diff --git a/apps/clustering/admin.py b/apps/clustering/admin.py new file mode 100644 index 0000000000..ad92b0a549 --- /dev/null +++ b/apps/clustering/admin.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import + +from django.contrib import admin +from django.utils.translation import ugettext_lazy as _ + +from .models import Node + + +class NodeAdmin(admin.ModelAdmin): + list_display = ('hostname', 'cpuload', 'heartbeat', 'memory_usage') + + +admin.site.register(Node, NodeAdmin) diff --git a/apps/clustering/links.py b/apps/clustering/links.py new file mode 100644 index 0000000000..606103a6de --- /dev/null +++ b/apps/clustering/links.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import + +from django.utils.translation import ugettext_lazy as _ + +from navigation.api import Link + +from .permissions import (PERMISSION_NODES_VIEW) + +tool_link = Link(text=_(u'clustering'), view='node_list', icon='server.png', permissions=[PERMISSION_NODES_VIEW]) # children_view_regex=[r'^index_setup', r'^template_node']) +node_list = Link(text=_(u'node list'), view='node_list', sprite='server', permissions=[PERMISSION_NODES_VIEW]) diff --git a/apps/clustering/migrations/0001_initial.py b/apps/clustering/migrations/0001_initial.py new file mode 100644 index 0000000000..7932f6ade5 --- /dev/null +++ b/apps/clustering/migrations/0001_initial.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'Node' + db.create_table('clustering_node', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('hostname', self.gf('django.db.models.fields.CharField')(max_length=255)), + ('cpuload', self.gf('django.db.models.fields.PositiveIntegerField')(default=0, blank=True)), + ('heartbeat', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 7, 30, 0, 0), blank=True)), + ('memory_usage', self.gf('django.db.models.fields.FloatField')(blank=True)), + )) + db.send_create_signal('clustering', ['Node']) + + + def backwards(self, orm): + # Deleting model 'Node' + db.delete_table('clustering_node') + + + models = { + 'clustering.node': { + 'Meta': {'object_name': 'Node'}, + 'cpuload': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'blank': 'True'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'memory_usage': ('django.db.models.fields.FloatField', [], {'blank': 'True'}) + } + } + + complete_apps = ['clustering'] \ No newline at end of file diff --git a/apps/clustering/migrations/__init__.py b/apps/clustering/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/clustering/models.py b/apps/clustering/models.py new file mode 100644 index 0000000000..67ded6cf04 --- /dev/null +++ b/apps/clustering/models.py @@ -0,0 +1,44 @@ +from __future__ import absolute_import + +import os +import datetime +import platform + +import psutil + +from django.db import models, IntegrityError, transaction +from django.db import close_connection +from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import ugettext + + +class NodeManager(models.Manager): + def myself(self): + node, created = self.model.objects.get_or_create(hostname=platform.node(), defaults={'memory_usage': 100}) + node.refresh() + return node + + +class Node(models.Model): + hostname = models.CharField(max_length=255, verbose_name=_(u'hostname')) + cpuload = models.PositiveIntegerField(blank=True, default=0, verbose_name=_(u'cpu load')) + heartbeat = models.DateTimeField(blank=True, default=datetime.datetime.now(), verbose_name=_(u'last heartbeat check')) + memory_usage = models.FloatField(blank=True, verbose_name=_(u'memory usage')) + + objects = NodeManager() + + def __unicode__(self): + return self.hostname + + def refresh(self): + self.cpuload = psutil.cpu_percent() + self.memory_usage = psutil.phymem_usage().percent + self.save() + + def save(self, *args, **kwargs): + self.heartbeat = datetime.datetime.now() + return super(Node, self).save(*args, **kwargs) + + class Meta: + verbose_name = _(u'node') + verbose_name_plural = _(u'nodes') diff --git a/apps/clustering/permissions.py b/apps/clustering/permissions.py new file mode 100644 index 0000000000..6065936140 --- /dev/null +++ b/apps/clustering/permissions.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +from django.utils.translation import ugettext_lazy as _ + +from permissions.models import PermissionNamespace, Permission + +namespace = PermissionNamespace('clustering', _(u'Clustering')) +PERMISSION_NODES_VIEW = Permission.objects.register(namespace, 'nodes_view', _(u'View the nodes in a Mayan cluster')) diff --git a/apps/clustering/tasks.py b/apps/clustering/tasks.py new file mode 100644 index 0000000000..aa9f01b53e --- /dev/null +++ b/apps/clustering/tasks.py @@ -0,0 +1,17 @@ +from __future__ import absolute_import + +import logging + +from lock_manager.decorators import simple_locking + +from .models import Node + +LOCK_EXPIRE = 10 + +logger = logging.getLogger(__name__) + + +@simple_locking('refresh_node', 10) +def refresh_node(): + logger.debug('starting') + node = Node.objects.myself() # Automatically calls the refresh() method too diff --git a/apps/clustering/urls.py b/apps/clustering/urls.py new file mode 100644 index 0000000000..e43cf0041d --- /dev/null +++ b/apps/clustering/urls.py @@ -0,0 +1,6 @@ +from django.conf.urls.defaults import patterns, url + + +urlpatterns = patterns('clustering.views', + url(r'^node/list/$', 'node_list', (), 'node_list'), +) diff --git a/apps/clustering/views.py b/apps/clustering/views.py new file mode 100644 index 0000000000..efaafe1ad5 --- /dev/null +++ b/apps/clustering/views.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import + +from django.shortcuts import render_to_response +from django.template import RequestContext +from django.utils.translation import ugettext_lazy as _ +from django.shortcuts import get_object_or_404 +from django.db.models.loading import get_model +from django.http import Http404 +from django.core.exceptions import PermissionDenied + +from permissions.models import Permission +from common.utils import encapsulate +from acls.models import AccessEntry + +from .models import Node +from .permissions import PERMISSION_NODES_VIEW + + +def node_list(request): + Permission.objects.check_permissions(request.user, [PERMISSION_NODES_VIEW]) + + context = { + 'object_list': Node.objects.all(), + 'title': _(u'nodes'), + 'extra_columns': [ + { + 'name': _(u'hostname'), + 'attribute': 'hostname', + }, + { + 'name': _(u'cpu load'), + 'attribute': 'cpuload', + }, + { + 'name': _(u'heartbeat'), + 'attribute': 'heartbeat', + }, + { + 'name': _(u'memory usage'), + 'attribute': 'memory_usage', + }, + + ], + 'hide_object': True, + } + + return render_to_response('generic_list.html', context, + context_instance=RequestContext(request)) + + +def node_workers(request, node_pk): + node = get_object_or_404(Node, pk=node_pk) + + try: + Permission.objects.check_permissions(request.user, [PERMISSION_NODES_VIEW]) + except PermissionDenied: + AccessEntry.objects.check_access(PERMISSION_NODES_VIEW, request.user, node) + + context = { + 'object_list': node.workers.all(), + 'title': _(u'workers for node: %s') % node, + 'object': node, + 'hide_object': True, + } + + return render_to_response('generic_list.html', context, + context_instance=RequestContext(request)) diff --git a/settings.py b/settings.py index c627ac33f6..aa9dc1e814 100644 --- a/settings.py +++ b/settings.py @@ -162,6 +162,7 @@ INSTALLED_APPS = ( 'converter', 'user_management', 'mimetype', + 'clustering', 'scheduler', 'job_processor', # Mayan EDMS diff --git a/urls.py b/urls.py index 4a5ebe61ec..82b10cc5d1 100644 --- a/urls.py +++ b/urls.py @@ -36,10 +36,12 @@ urlpatterns = patterns('', (r'^checkouts/', include('checkouts.urls')), (r'^installation/', include('installation.urls')), (r'^scheduler/', include('scheduler.urls')), + (r'^job_processing/', include('job_processor.urls')), (r'^bootstrap/', include('bootstrap.urls')), (r'^diagnostics/', include('diagnostics.urls')), (r'^maintenance/', include('maintenance.urls')), (r'^statistics/', include('statistics.urls')), + (r'^clustering/', include('clustering.urls')), ) From 30eb32f8886ea358592b6e5ac8221c5d4c28e7f0 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 12:55:53 -0400 Subject: [PATCH 18/40] Job processing app updates --- apps/job_processor/__init__.py | 22 ++++++++++++++++--- apps/job_processor/admin.py | 13 +---------- apps/job_processor/links.py | 15 +++++++++++++ apps/job_processor/models.py | 34 ++--------------------------- apps/job_processor/permissions.py | 8 +++++++ apps/job_processor/tasks.py | 13 +++-------- apps/job_processor/urls.py | 15 +++++++++++++ apps/job_processor/views.py | 36 ++++++++++++++++++++++++++++++- 8 files changed, 98 insertions(+), 58 deletions(-) create mode 100644 apps/job_processor/links.py create mode 100644 apps/job_processor/permissions.py create mode 100644 apps/job_processor/urls.py diff --git a/apps/job_processor/__init__.py b/apps/job_processor/__init__.py index 0d06afc514..8bbd215612 100644 --- a/apps/job_processor/__init__.py +++ b/apps/job_processor/__init__.py @@ -3,11 +3,27 @@ from __future__ import absolute_import from django.utils.translation import ugettext_lazy as _ from scheduler.api import register_interval_job +from navigation.api import bind_links, register_model_list_columns +from project_tools.api import register_tool +from common.utils import encapsulate -from .tasks import refresh_node, job_queue_poll +from .tasks import job_queue_poll +from .links import node_workers +from clustering.models import Node -NODE_REFRESH_INTERVAL = 1 JOB_QUEUE_POLL_INTERVAL = 1 -register_interval_job('refresh_node', _(u'Update a node\'s properties.'), refresh_node, seconds=NODE_REFRESH_INTERVAL) register_interval_job('job_queue_poll', _(u'Poll a job queue for pending jobs.'), job_queue_poll, seconds=JOB_QUEUE_POLL_INTERVAL) + +#register_tool(tool_link) +#bind_links([Node, 'node_list'], [node_list], menu_name='secondary_menu') +bind_links([Node], [node_workers]) + +Node.add_to_class('workers', lambda node: node.worker_set) + +register_model_list_columns(Node, [ + { + 'name': _(u'total workers'), + 'attribute': encapsulate(lambda x: x.workers().all().count()) + }, +]) diff --git a/apps/job_processor/admin.py b/apps/job_processor/admin.py index 29d0535763..4739a8df26 100644 --- a/apps/job_processor/admin.py +++ b/apps/job_processor/admin.py @@ -3,17 +3,7 @@ from __future__ import absolute_import from django.contrib import admin from django.utils.translation import ugettext_lazy as _ -from .models import Node, JobQueue, JobQueueItem, Worker - - -class WorkerInline(admin.StackedInline): - list_display = ('name', 'creation_datetime', 'state') - model = Worker - - -class NodeAdmin(admin.ModelAdmin): - list_display = ('hostname', 'cpuload', 'heartbeat', 'memory_usage') - inlines = [WorkerInline] +from .models import JobQueue, JobQueueItem class JobQueueItemInline(admin.StackedInline): @@ -30,5 +20,4 @@ class JobQueueAdmin(admin.ModelAdmin): total_items.short_description = _(u'total items') -admin.site.register(Node, NodeAdmin) admin.site.register(JobQueue, JobQueueAdmin) diff --git a/apps/job_processor/links.py b/apps/job_processor/links.py new file mode 100644 index 0000000000..5d6dbb44cc --- /dev/null +++ b/apps/job_processor/links.py @@ -0,0 +1,15 @@ +from __future__ import absolute_import + +from django.utils.translation import ugettext_lazy as _ + +from navigation.api import Link + +from clustering.permissions import (PERMISSION_NODES_VIEW) + + +node_workers = Link(text=_(u'workers'), view='node_workers', args='object.pk', sprite='lorry_go', permissions=[PERMISSION_NODES_VIEW]) +#index_setup_create = Link(text=_(u'create index'), view='index_setup_create', sprite='tab_add', permissions=[PERMISSION_DOCUMENT_INDEXING_CREATE]) +#index_setup_edit = Link(text=_(u'edit'), view='index_setup_edit', args='index.pk', sprite='tab_edit', permissions=[PERMISSION_DOCUMENT_INDEXING_EDIT]) +#index_setup_delete = Link(text=_(u'delete'), view='index_setup_delete', args='index.pk', sprite='tab_delete', permissions=[PERMISSION_DOCUMENT_INDEXING_DELETE]) +#index_setup_view = Link(text=_(u'tree template'), view='index_setup_view', args='index.pk', sprite='textfield', permissions=[PERMISSION_DOCUMENT_INDEXING_SETUP]) +#index_setup_document_types = Link(text=_(u'document types'), view='index_setup_document_types', args='index.pk', sprite='layout', permissions=[PERMISSION_DOCUMENT_INDEXING_EDIT]) # children_view_regex=[r'^index_setup', r'^template_node']) diff --git a/apps/job_processor/models.py b/apps/job_processor/models.py index fdd4b9784e..279c22c87f 100644 --- a/apps/job_processor/models.py +++ b/apps/job_processor/models.py @@ -17,6 +17,8 @@ from django.utils.translation import ugettext from django.utils.simplejson import loads, dumps from common.models import Singleton +from clustering.models import Node + from .literals import (JOB_STATE_CHOICES, JOB_STATE_PENDING, JOB_STATE_PROCESSING, JOB_STATE_ERROR, WORKER_STATE_CHOICES, WORKER_STATE_RUNNING) @@ -67,38 +69,6 @@ class JobType(object): p.start() -class NodeManager(models.Manager): - def myself(self): - node, created = self.model.objects.get_or_create(hostname=platform.node(), defaults={'memory_usage': 100}) - node.refresh() - return node - - -class Node(models.Model): - hostname = models.CharField(max_length=255, verbose_name=_(u'hostname')) - cpuload = models.PositiveIntegerField(blank=True, default=0, verbose_name=_(u'cpu load')) - heartbeat = models.DateTimeField(blank=True, default=datetime.datetime.now(), verbose_name=_(u'last heartbeat check')) - memory_usage = models.FloatField(blank=True, verbose_name=_(u'memory usage')) - - objects = NodeManager() - - def __unicode__(self): - return self.hostname - - def refresh(self): - self.cpuload = psutil.cpu_percent() - self.memory_usage = psutil.phymem_usage().percent - self.save() - - def save(self, *args, **kwargs): - self.heartbeat = datetime.datetime.now() - return super(Node, self).save(*args, **kwargs) - - class Meta: - verbose_name = _(u'node') - verbose_name_plural = _(u'nodes') - - class JobQueueManager(models.Manager): def get_or_create(self, *args, **kwargs): job_queue_labels[kwargs.get('name')] = kwargs.get('defaults', {}).get('label') diff --git a/apps/job_processor/permissions.py b/apps/job_processor/permissions.py new file mode 100644 index 0000000000..4b5988c48d --- /dev/null +++ b/apps/job_processor/permissions.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +from django.utils.translation import ugettext_lazy as _ + +from permissions.models import PermissionNamespace, Permission + +namespace = PermissionNamespace('job_processor', _(u'Job processor')) +#PERMISSION_NODES_VIEW = Permission.objects.register(namespace, 'nodes_view', _(u'View the registeres nodes in a Mayan cluster')) diff --git a/apps/job_processor/tasks.py b/apps/job_processor/tasks.py index 4fa131ff7d..226d841a3a 100644 --- a/apps/job_processor/tasks.py +++ b/apps/job_processor/tasks.py @@ -3,31 +3,24 @@ from __future__ import absolute_import import logging from lock_manager import Lock, LockError -from lock_manager.decorators import simple_locking +from clustering.models import Node -from .models import Node, JobQueue +from .models import JobQueue from .exceptions import JobQueueNoPendingJobs LOCK_EXPIRE = 10 -# TODO: Tie LOCK_EXPIRATION with hard task timeout MAX_CPU_LOAD = 90 MAX_MEMORY_USAGE = 90 logger = logging.getLogger(__name__) -@simple_locking('refresh_node', 10) -def refresh_node(): - logger.debug('starting') - node = Node.objects.myself() # Automatically calls the refresh() method too - - def job_queue_poll(): logger.debug('starting') node = Node.objects.myself() # Automatically calls the refresh() method too if node.cpuload < MAX_CPU_LOAD and node.memory_usage < MAX_MEMORY_USAGE: - # Poll job queues is node is not overloaded + # Poll job queues if node is not overloaded lock_id = u'job_queue_poll' try: lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE) diff --git a/apps/job_processor/urls.py b/apps/job_processor/urls.py new file mode 100644 index 0000000000..572d8ce103 --- /dev/null +++ b/apps/job_processor/urls.py @@ -0,0 +1,15 @@ +from django.conf.urls.defaults import patterns, url + + +urlpatterns = patterns('job_processor.views', + #url(r'^node/list/$', 'node_list', (), 'node_list'), + url(r'^node/(?P\d+)/workers/$', 'node_workers', (), 'node_workers'), + #url(r'^create/$', 'folder_create', (), 'folder_create'), + #url(r'^(?P\d+)/edit/$', 'folder_edit', (), 'folder_edit'), + #url(r'^(?P\d+)/delete/$', 'folder_delete', (), 'folder_delete'), + #url(r'^(?P\d+)/$', 'folder_view', (), 'folder_view'), + #url(r'^(?P\d+)/remove/document/multiple/$', 'folder_document_multiple_remove', (), 'folder_document_multiple_remove'), + #url(r'^document/(?P\d+)/folder/add/$', 'folder_add_document', (), 'folder_add_document'), + #url(r'^document/(?P\d+)/folder/list/$', 'document_folder_list', (), 'document_folder_list'), + #url(r'^(?P\d+)/acl/list/$', 'folder_acl_list', (), 'folder_acl_list'), +) diff --git a/apps/job_processor/views.py b/apps/job_processor/views.py index 60f00ef0ef..2936620c60 100644 --- a/apps/job_processor/views.py +++ b/apps/job_processor/views.py @@ -1 +1,35 @@ -# Create your views here. +from __future__ import absolute_import + +from django.shortcuts import render_to_response +from django.template import RequestContext +from django.utils.translation import ugettext_lazy as _ +from django.shortcuts import get_object_or_404 +from django.contrib.contenttypes.models import ContentType +from django.db.models.loading import get_model +from django.http import Http404 +from django.core.exceptions import PermissionDenied + +from permissions.models import Permission +from common.utils import encapsulate +from acls.models import AccessEntry +from clustering.permissions import PERMISSION_NODES_VIEW +from clustering.models import Node + + +def node_workers(request, node_pk): + node = get_object_or_404(Node, pk=node_pk) + + try: + Permission.objects.check_permissions(request.user, [PERMISSION_NODES_VIEW]) + except PermissionDenied: + AccessEntry.objects.check_access(PERMISSION_NODES_VIEW, request.user, node) + + context = { + 'object_list': node.workers().all(), + 'title': _(u'workers for node: %s') % node, + 'object': node, + 'hide_object': True, + } + + return render_to_response('generic_list.html', context, + context_instance=RequestContext(request)) From a7437a3ada90798a847da7e251beb07db2401653 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 12:56:12 -0400 Subject: [PATCH 19/40] Remove further code from the ocr app --- apps/ocr/__init__.py | 8 ++- apps/ocr/links.py | 4 +- apps/ocr/urls.py | 14 ++--- apps/ocr/views.py | 126 +------------------------------------------ 4 files changed, 14 insertions(+), 138 deletions(-) diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index 7ea9921e14..bfcb1187c0 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -18,6 +18,7 @@ from acls.api import class_permissions from scheduler.api import register_interval_job from statistics.api import register_statistics from job_processor.models import JobQueue, JobType +from job_processor.exceptions import JobQueuePushError from .conf.settings import (AUTOMATIC_OCR, QUEUE_PROCESSING_INTERVAL) from .models import OCRProcessingSingleton @@ -60,8 +61,8 @@ def document_post_save(sender, instance, **kwargs): if kwargs.get('created', False): if AUTOMATIC_OCR: try: - DocumentQueue.objects.queue_document(instance.document) - except AlreadyQueued: + instance.submit_for_ocr() + except JobQueuePushError: pass # Disabled because it appears Django execute signals using the same @@ -82,3 +83,6 @@ class_permissions(Document, [ #register_statistics(get_statistics) create_ocr_job_queue() ocr_job_type = JobType('ocr', _(u'OCR'), do_document_ocr) + +Document.add_to_class('submit_for_ocr', lambda document: ocr_job_queue.push(ocr_job_type, document_version_pk=document.pk)) +DocumentVersion.add_to_class('submit_for_ocr', lambda document_version: ocr_job_queue.push(ocr_job_type, document_version_pk=document.latest_version.pk)) diff --git a/apps/ocr/links.py b/apps/ocr/links.py index 84a01e8238..b3d34171f7 100644 --- a/apps/ocr/links.py +++ b/apps/ocr/links.py @@ -19,8 +19,8 @@ def is_disabled(context): ocr_log = Link(text=_(u'queue document list'), view='ocr_log', sprite='text', permissions=[PERMISSION_OCR_DOCUMENT]) ocr_disable = Link(text=_(u'disable OCR processing'), view='ocr_disable', sprite='control_stop_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE], conditional_disable=is_disabled) ocr_enable = Link(text=_(u'enable OCR processing'), view='ocr_enable', sprite='control_play_blue', permissions=[PERMISSION_OCR_QUEUE_ENABLE_DISABLE], conditional_disable=is_enabled) -submit_document = Link(text=_('submit to OCR queue'), view='submit_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) -submit_document_multiple = Link(text=_('submit to OCR queue'), view='submit_document_multiple', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) +submit_document = Link(text=_('submit to OCR queue'), view='submit_document', args='object.id', sprite='text_dropcaps', permissions=[PERMISSION_OCR_DOCUMENT]) +submit_document_multiple = Link(text=_('submit to OCR queue'), view='submit_document_multiple', sprite='text_dropcaps', permissions=[PERMISSION_OCR_DOCUMENT]) re_queue_document = Link(text=_('re-queue'), view='re_queue_document', args='object.id', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) re_queue_multiple_document = Link(text=_('re-queue'), view='re_queue_multiple_document', sprite='hourglass_add', permissions=[PERMISSION_OCR_DOCUMENT]) queue_document_delete = Link(text=_(u'delete'), view='queue_document_delete', args='object.id', sprite='hourglass_delete', permissions=[PERMISSION_OCR_DOCUMENT_DELETE]) diff --git a/apps/ocr/urls.py b/apps/ocr/urls.py index b652f800c7..91b3e2d85f 100644 --- a/apps/ocr/urls.py +++ b/apps/ocr/urls.py @@ -8,16 +8,10 @@ urlpatterns = patterns('ocr.views', url(r'^document/(?P\d+)/submit/$', 'submit_document', (), 'submit_document'), url(r'^document/multiple/submit/$', 'submit_document_multiple', (), 'submit_document_multiple'), - url(r'^queue/document/(?P\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'), - url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'), - url(r'^queue/document/(?P\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'), - url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'), - + #url(r'^queue/document/(?P\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'), + #url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'), + #url(r'^queue/document/(?P\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'), + #url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'), url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'), - - url(r'^queue/(?P\d+)/transformation/list/$', 'setup_queue_transformation_list', (), 'setup_queue_transformation_list'), - url(r'^queue/(?P\w+)/transformation/create/$', 'setup_queue_transformation_create', (), 'setup_queue_transformation_create'), - url(r'^queue/transformation/(?P\w+)/edit/$', 'setup_queue_transformation_edit', (), 'setup_queue_transformation_edit'), - url(r'^queue/transformation/(?P\w+)/delete/$', 'setup_queue_transformation_delete', (), 'setup_queue_transformation_delete'), ) diff --git a/apps/ocr/views.py b/apps/ocr/views.py index 1c019be7ac..ba70b072f6 100644 --- a/apps/ocr/views.py +++ b/apps/ocr/views.py @@ -199,7 +199,8 @@ def submit_document_to_queue(request, document, post_submit_redirect=None): """ try: - ocr_job_queue.push(ocr_job_type, document_version_pk=document.latest_version.pk) + document.submit_for_ocr() + #ocr_job_queue.push(ocr_job_type, document_version_pk=document.latest_version.pk) messages.success(request, _(u'Document: %(document)s was added to the OCR queue sucessfully.') % { 'document': document}) except JobQueuePushError: @@ -301,126 +302,3 @@ def display_link(obj): return u''.join(output) else: return obj - - -# Setup views -def setup_queue_transformation_list(request, document_queue_id): - Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_EDIT]) - - document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id) - - context = { - 'object_list': QueueTransformation.transformations.get_for_object(document_queue), - 'title': _(u'transformations for: %s') % document_queue, - 'queue': document_queue, - 'object_name': _(u'document queue'), - 'navigation_object_name': 'queue', - 'list_object_variable_name': 'transformation', - 'extra_columns': [ - {'name': _(u'order'), 'attribute': 'order'}, - {'name': _(u'transformation'), 'attribute': encapsulate(lambda x: x.get_transformation_display())}, - {'name': _(u'arguments'), 'attribute': 'arguments'} - ], - 'hide_link': True, - 'hide_object': True, - } - - return render_to_response('generic_list.html', context, - context_instance=RequestContext(request)) - - -def setup_queue_transformation_edit(request, transformation_id): - Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_EDIT]) - - transformation = get_object_or_404(QueueTransformation, pk=transformation_id) - redirect_view = reverse('setup_queue_transformation_list', args=[transformation.content_object.pk]) - next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', redirect_view))) - - if request.method == 'POST': - form = QueueTransformationForm(instance=transformation, data=request.POST) - if form.is_valid(): - try: - form.save() - messages.success(request, _(u'Queue transformation edited successfully')) - return HttpResponseRedirect(next) - except Exception, e: - messages.error(request, _(u'Error editing queue transformation; %s') % e) - else: - form = QueueTransformationForm(instance=transformation) - - return render_to_response('generic_form.html', { - 'title': _(u'Edit transformation: %s') % transformation, - 'form': form, - 'queue': transformation.content_object, - 'transformation': transformation, - 'navigation_object_list': [ - {'object': 'queue', 'name': _(u'document queue')}, - {'object': 'transformation', 'name': _(u'transformation')} - ], - 'next': next, - }, - context_instance=RequestContext(request)) - - -def setup_queue_transformation_delete(request, transformation_id): - Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_EDIT]) - - transformation = get_object_or_404(QueueTransformation, pk=transformation_id) - redirect_view = reverse('setup_queue_transformation_list', args=[transformation.content_object.pk]) - previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', redirect_view))) - - if request.method == 'POST': - try: - transformation.delete() - messages.success(request, _(u'Queue transformation deleted successfully.')) - except Exception, e: - messages.error(request, _(u'Error deleting queue transformation; %(error)s') % { - 'error': e} - ) - return HttpResponseRedirect(redirect_view) - - return render_to_response('generic_confirm.html', { - 'delete_view': True, - 'transformation': transformation, - 'queue': transformation.content_object, - 'navigation_object_list': [ - {'object': 'queue', 'name': _(u'document queue')}, - {'object': 'transformation', 'name': _(u'transformation')} - ], - 'title': _(u'Are you sure you wish to delete queue transformation "%(transformation)s"') % { - 'transformation': transformation.get_transformation_display(), - }, - 'previous': previous, - 'form_icon': u'shape_square_delete.png', - }, - context_instance=RequestContext(request)) - - -def setup_queue_transformation_create(request, document_queue_id): - Permission.objects.check_permissions(request.user, [PERMISSION_OCR_QUEUE_EDIT]) - - document_queue = get_object_or_404(DocumentQueue, pk=document_queue_id) - - redirect_view = reverse('setup_queue_transformation_list', args=[document_queue.pk]) - - if request.method == 'POST': - form = QueueTransformationForm_create(request.POST) - if form.is_valid(): - try: - queue_tranformation = form.save(commit=False) - queue_tranformation.content_object = document_queue - queue_tranformation.save() - messages.success(request, _(u'Queue transformation created successfully')) - return HttpResponseRedirect(redirect_view) - except Exception, e: - messages.error(request, _(u'Error creating queue transformation; %s') % e) - else: - form = QueueTransformationForm_create() - - return render_to_response('generic_form.html', { - 'form': form, - 'queue': document_queue, - 'object_name': _(u'document queue'), - 'navigation_object_name': 'queue', - 'title': _(u'Create new transformation for queue: %s') % document_queue, - }, context_instance=RequestContext(request)) From 2f6a1d4e767a63cc422cd055a7e605797fd99423 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 12:56:27 -0400 Subject: [PATCH 20/40] Add job processing app migration --- ...2_auto__del_node__chg_field_worker_node.py | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 apps/job_processor/migrations/0002_auto__del_node__chg_field_worker_node.py diff --git a/apps/job_processor/migrations/0002_auto__del_node__chg_field_worker_node.py b/apps/job_processor/migrations/0002_auto__del_node__chg_field_worker_node.py new file mode 100644 index 0000000000..6edc8dcbc3 --- /dev/null +++ b/apps/job_processor/migrations/0002_auto__del_node__chg_field_worker_node.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Deleting model 'Node' + db.delete_table('job_processor_node') + + + # Changing field 'Worker.node' + db.alter_column('job_processor_worker', 'node_id', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['clustering.Node'])) + + def backwards(self, orm): + # Adding model 'Node' + db.create_table('job_processor_node', ( + ('memory_usage', self.gf('django.db.models.fields.FloatField')(blank=True)), + ('hostname', self.gf('django.db.models.fields.CharField')(max_length=255)), + ('cpuload', self.gf('django.db.models.fields.PositiveIntegerField')(default=0, blank=True)), + ('heartbeat', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 7, 30, 0, 0), blank=True)), + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + )) + db.send_create_signal('job_processor', ['Node']) + + + # Changing field 'Worker.node' + db.alter_column('job_processor_worker', 'node_id', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['job_processor.Node'])) + + models = { + 'clustering.node': { + 'Meta': {'object_name': 'Node'}, + 'cpuload': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'blank': 'True'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'memory_usage': ('django.db.models.fields.FloatField', [], {'blank': 'True'}) + }, + 'job_processor.jobqueue': { + 'Meta': {'object_name': 'JobQueue'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}), + 'unique_jobs': ('django.db.models.fields.BooleanField', [], {'default': 'True'}) + }, + 'job_processor.jobqueueitem': { + 'Meta': {'ordering': "('creation_datetime',)", 'object_name': 'JobQueueItem'}, + 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'job_queue': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['job_processor.JobQueue']"}), + 'job_type': ('django.db.models.fields.CharField', [], {'max_length': '32'}), + 'kwargs': ('django.db.models.fields.TextField', [], {}), + 'result': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'state': ('django.db.models.fields.CharField', [], {'default': "'p'", 'max_length': '4'}), + 'unique_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'blank': 'True'}) + }, + 'job_processor.worker': { + 'Meta': {'ordering': "('creation_datetime',)", 'object_name': 'Worker'}, + 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'node': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['clustering.Node']"}), + 'state': ('django.db.models.fields.CharField', [], {'default': "'r'", 'max_length': '4'}) + } + } + + complete_apps = ['job_processor'] \ No newline at end of file From 862bb43d93689c779a1e1637fe8c63ba4b825327 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 12:56:39 -0400 Subject: [PATCH 21/40] Upgrade notes update --- docs/releases/0.13.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/releases/0.13.rst b/docs/releases/0.13.rst index 79f73a2b69..f3313869b1 100644 --- a/docs/releases/0.13.rst +++ b/docs/releases/0.13.rst @@ -79,6 +79,7 @@ Afterwards migrate existing database schema with:: $ ./manage.py migrate linking 0001 --fake $ ./manage.py migrate lock_manager 0001 --fake $ ./manage.py migrate job_processor + $ ./manage.py migrate clustering Issue the following command to index existing documents in the new full text search database:: From c14634dafa64d78ab2ac71f1057591411df6e06b Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 12:56:56 -0400 Subject: [PATCH 22/40] Update clustering app static files --- apps/clustering/static/images/icons/server.png | Bin 0 -> 997 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 apps/clustering/static/images/icons/server.png diff --git a/apps/clustering/static/images/icons/server.png b/apps/clustering/static/images/icons/server.png new file mode 100644 index 0000000000000000000000000000000000000000..5b1fb2d7e9ecac5e4b75a3abb191edc38bd9b096 GIT binary patch literal 997 zcmVkTdvDo85EVj( zhKl4zV5Ue`_N6IFBtrF2|3MG+SP$ic(L)G9KJ*_Hp@NhjAz_N4Q3UytSW-fPz4T_< zb$4f`b7uGIRlK|7-QCMDcX!Wk<~Qg3_M91}l!9y*V=VnQ(e<>qpK+@_$UYONqb<@wc!ZHH_R%g%izJh`S9p#6MabN{O6bQjMgSoE@u(WvtsabdDC@_;6QzozOkV-39dJns>RB?QJ^~YI_izyXLbUla1{ZqKv zFz^7z{>Y(y-d7L@0sTEQ%mWtJI@nk)*K33F>z*#bXeVWCUxCyWuA*>{KNVZ6`5cF^ zc%Hs-nE*Fo>DUW9rE#$oBUcDNAE-^C#_5>_52s2rr#okUL1T+ohR28IJOXhPBZ_5u zD3(sQ`W>zqY`EL-O?$v`mSJpU9%u)_omIr`jP3!ZLm+f^S=*3ij47FoB%JlfwjUry zo^v%ma06@P0oTjxt*2{ol@7Et)`IQ#tF{dnPSs$)3N6?>*ta&;U?~(zObd{H zgJbWZ?d)-QH8u@x=d$$pp6tk8<3chQH%UYex{~_KO${(OJ`L^7CtxT`Pw&Yt6Bj)M z(%X;>kAHxqWx>cqa8Gt*AIqLFeMH-lOag4 Date: Mon, 30 Jul 2012 13:06:42 -0400 Subject: [PATCH 23/40] Fix inverted logic --- apps/ocr/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index bfcb1187c0..4b04ad41f7 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -84,5 +84,5 @@ class_permissions(Document, [ create_ocr_job_queue() ocr_job_type = JobType('ocr', _(u'OCR'), do_document_ocr) -Document.add_to_class('submit_for_ocr', lambda document: ocr_job_queue.push(ocr_job_type, document_version_pk=document.pk)) -DocumentVersion.add_to_class('submit_for_ocr', lambda document_version: ocr_job_queue.push(ocr_job_type, document_version_pk=document.latest_version.pk)) +Document.add_to_class('submit_for_ocr', lambda document: ocr_job_queue.push(ocr_job_type, document_version_pk=document.latest_version.pk)) +DocumentVersion.add_to_class('submit_for_ocr', lambda document_version: ocr_job_queue.push(ocr_job_type, document_version_pk=document_version.pk)) From cfb1278c7e39e746210e2f95620735832a83b7a7 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 13:07:01 -0400 Subject: [PATCH 24/40] Use floating point literals --- apps/job_processor/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/job_processor/tasks.py b/apps/job_processor/tasks.py index 226d841a3a..32280c80a9 100644 --- a/apps/job_processor/tasks.py +++ b/apps/job_processor/tasks.py @@ -9,8 +9,8 @@ from .models import JobQueue from .exceptions import JobQueueNoPendingJobs LOCK_EXPIRE = 10 -MAX_CPU_LOAD = 90 -MAX_MEMORY_USAGE = 90 +MAX_CPU_LOAD = 90.0 +MAX_MEMORY_USAGE = 90.0 logger = logging.getLogger(__name__) From c2da6512e4781581d5077580e5e0d83055ea09c9 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 13:11:15 -0400 Subject: [PATCH 25/40] Record and display worker name correctly --- apps/job_processor/models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/job_processor/models.py b/apps/job_processor/models.py index 279c22c87f..6fb7b52962 100644 --- a/apps/job_processor/models.py +++ b/apps/job_processor/models.py @@ -35,7 +35,7 @@ class Job(object): # Run sync or launch async subprocess # OR launch 2 processes: monitor & actual process node = Node.objects.myself() - worker = Worker.objects.create(node=node, name=u'%s-%d' % (node.hostname, os.getpid())) + worker = Worker.objects.create(node=node, name=os.getpid()) try: close_connection() transaction.commit_on_success(function)(**loads(job_queue_item.kwargs)) @@ -181,6 +181,9 @@ class Worker(models.Model): default=WORKER_STATE_RUNNING, verbose_name=_(u'state')) + def __unicode__(self): + return u'%s-%s' % (self.node.hostname, self.name) + #def disable(self): # if self.state == WORKER_STATE_DISABLED: # raise WorkerAlreadyDisabled From 6d8aebb4119a28fa633f0441f873e07f2eda459c Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 13:13:59 -0400 Subject: [PATCH 26/40] Change name --- apps/job_processor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/job_processor/__init__.py b/apps/job_processor/__init__.py index 8bbd215612..eaa6339041 100644 --- a/apps/job_processor/__init__.py +++ b/apps/job_processor/__init__.py @@ -23,7 +23,7 @@ Node.add_to_class('workers', lambda node: node.worker_set) register_model_list_columns(Node, [ { - 'name': _(u'total workers'), + 'name': _(u'active workers'), 'attribute': encapsulate(lambda x: x.workers().all().count()) }, ]) From d339a250fe7c5bbd57a4d17849c9938515999f34 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 13:16:19 -0400 Subject: [PATCH 27/40] Don't hide worker objects --- apps/job_processor/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/job_processor/views.py b/apps/job_processor/views.py index 2936620c60..fa8a2112fe 100644 --- a/apps/job_processor/views.py +++ b/apps/job_processor/views.py @@ -28,7 +28,7 @@ def node_workers(request, node_pk): 'object_list': node.workers().all(), 'title': _(u'workers for node: %s') % node, 'object': node, - 'hide_object': True, + #'hide_object': True, } return render_to_response('generic_list.html', context, From e0fbac66d113f1a27c0305bb50d3d6008e91d301 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 16:23:38 -0400 Subject: [PATCH 28/40] Connect the worker with the job queue item that spawned it --- ...3_auto__add_field_worker_job_queue_item.py | 60 +++++++++++++++++++ apps/job_processor/models.py | 3 +- apps/job_processor/views.py | 34 ++++++++++- 3 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 apps/job_processor/migrations/0003_auto__add_field_worker_job_queue_item.py diff --git a/apps/job_processor/migrations/0003_auto__add_field_worker_job_queue_item.py b/apps/job_processor/migrations/0003_auto__add_field_worker_job_queue_item.py new file mode 100644 index 0000000000..31469b6b5b --- /dev/null +++ b/apps/job_processor/migrations/0003_auto__add_field_worker_job_queue_item.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding field 'Worker.job_queue_item' + db.add_column('job_processor_worker', 'job_queue_item', + self.gf('django.db.models.fields.related.ForeignKey')(default=1, to=orm['job_processor.JobQueueItem']), + keep_default=False) + + + def backwards(self, orm): + # Deleting field 'Worker.job_queue_item' + db.delete_column('job_processor_worker', 'job_queue_item_id') + + + models = { + 'clustering.node': { + 'Meta': {'object_name': 'Node'}, + 'cpuload': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'blank': 'True'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'memory_usage': ('django.db.models.fields.FloatField', [], {'blank': 'True'}) + }, + 'job_processor.jobqueue': { + 'Meta': {'object_name': 'JobQueue'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}), + 'unique_jobs': ('django.db.models.fields.BooleanField', [], {'default': 'True'}) + }, + 'job_processor.jobqueueitem': { + 'Meta': {'ordering': "('creation_datetime',)", 'object_name': 'JobQueueItem'}, + 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'job_queue': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['job_processor.JobQueue']"}), + 'job_type': ('django.db.models.fields.CharField', [], {'max_length': '32'}), + 'kwargs': ('django.db.models.fields.TextField', [], {}), + 'result': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'state': ('django.db.models.fields.CharField', [], {'default': "'p'", 'max_length': '4'}), + 'unique_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'blank': 'True'}) + }, + 'job_processor.worker': { + 'Meta': {'ordering': "('creation_datetime',)", 'object_name': 'Worker'}, + 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'job_queue_item': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['job_processor.JobQueueItem']"}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'node': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['clustering.Node']"}), + 'state': ('django.db.models.fields.CharField', [], {'default': "'r'", 'max_length': '4'}) + } + } + + complete_apps = ['job_processor'] \ No newline at end of file diff --git a/apps/job_processor/models.py b/apps/job_processor/models.py index 6fb7b52962..72a9142c51 100644 --- a/apps/job_processor/models.py +++ b/apps/job_processor/models.py @@ -35,7 +35,7 @@ class Job(object): # Run sync or launch async subprocess # OR launch 2 processes: monitor & actual process node = Node.objects.myself() - worker = Worker.objects.create(node=node, name=os.getpid()) + worker = Worker.objects.create(node=node, name=os.getpid(), job_queue_item=job_queue_item) try: close_connection() transaction.commit_on_success(function)(**loads(job_queue_item.kwargs)) @@ -180,6 +180,7 @@ class Worker(models.Model): choices=WORKER_STATE_CHOICES, default=WORKER_STATE_RUNNING, verbose_name=_(u'state')) + job_queue_item = models.ForeignKey(JobQueueItem, verbose_name=_(u'job queue item')) def __unicode__(self): return u'%s-%s' % (self.node.hostname, self.name) diff --git a/apps/job_processor/views.py b/apps/job_processor/views.py index fa8a2112fe..b1b5461c0b 100644 --- a/apps/job_processor/views.py +++ b/apps/job_processor/views.py @@ -28,8 +28,40 @@ def node_workers(request, node_pk): 'object_list': node.workers().all(), 'title': _(u'workers for node: %s') % node, 'object': node, - #'hide_object': True, + 'hide_links': True, + 'extra_columns': [ + { + 'name': _(u'created'), + 'attribute': 'creation_datetime', + }, + { + 'name': _(u'heartbeat'), + 'attribute': 'heartbeat', + }, + { + 'name': _(u'state'), + 'attribute': 'get_state_display', + }, + { + 'name': _(u'job queue item'), + 'attribute': 'job_queue_item', + }, + { + 'name': _(u'job type'), + 'attribute': 'job_queue_item.job_type', + }, + ], } return render_to_response('generic_list.html', context, context_instance=RequestContext(request)) + + + node = models.ForeignKey(Node, verbose_name=_(u'node')) + name = models.CharField(max_length=255, verbose_name=_(u'name')) + creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime'), default=lambda: datetime.datetime.now(), editable=False) + heartbeat = models.DateTimeField(blank=True, default=datetime.datetime.now(), verbose_name=_(u'heartbeat check')) + stat#e = models.CharField(max_length=4, + #choices=WORKER_STATE_CHOICES, + #default=WORKER_STATE_RUNNING, + #verbose_name=_(u'state')) From 60972cd0baa9bea394f8c00520f5fccb6d712c96 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 16:52:35 -0400 Subject: [PATCH 29/40] Add missing template from fabfile installer (Thanks to Pierre Host) --- fabfile/templates/settings_local.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 fabfile/templates/settings_local.py diff --git a/fabfile/templates/settings_local.py b/fabfile/templates/settings_local.py new file mode 100644 index 0000000000..633f25e6c8 --- /dev/null +++ b/fabfile/templates/settings_local.py @@ -0,0 +1,10 @@ +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.%(django_database_driver)s', + 'NAME': '%(database_name)s', + 'USER': '%(database_username)s', + 'PASSWORD': '%(database_password)s', + 'HOST': '%(database_host)s', + 'PORT': '', + } +} From 6930f76f2fcd601a22ccfd898cac8e0cab5a2d23 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 16:53:08 -0400 Subject: [PATCH 30/40] Only ignore the top settings_local.py file --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6a625abb94..869b2d90b2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ site_media/photologue/photos/* site_media/photologue/photos/cache/* *.sqlite -settings_local.py +/settings_local.py site_media/documents/* celerybeat-schedule document_storage/ From 014b2f1d1ffe7b5aef3c538896d9f92017802a5c Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 17:13:22 -0400 Subject: [PATCH 31/40] Do some more updates to the .gitignore file --- .gitignore | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 869b2d90b2..c517550e4d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,15 +1,15 @@ *.orig *.pyc *.pyo -site_media/photologue/photos/* -site_media/photologue/photos/cache/* -*.sqlite +/*.sqlite /settings_local.py -site_media/documents/* -celerybeat-schedule -document_storage/ -misc/mayan.geany -image_cache/ +/celerybeat-schedule +/document_storage/ +/misc/mayan.geany +/image_cache/ build/ _build/ -gpg_home/ +/gpg_home/ +/static/ +/whoosh_index/ +/fabfile_install From 87c958638df68fd6a45b59314bb10075d12275bd Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 23:51:05 -0400 Subject: [PATCH 32/40] Implement clustering housekeeping, deleting 'dead' nodes --- apps/clustering/__init__.py | 9 +++--- apps/clustering/models.py | 62 +++++++++++++++++++++++++++++++------ apps/clustering/tasks.py | 16 +++++++--- 3 files changed, 69 insertions(+), 18 deletions(-) diff --git a/apps/clustering/__init__.py b/apps/clustering/__init__.py index 4149c12347..f5b453f6d2 100644 --- a/apps/clustering/__init__.py +++ b/apps/clustering/__init__.py @@ -6,13 +6,12 @@ from scheduler.api import register_interval_job from navigation.api import bind_links from project_tools.api import register_tool -from .tasks import refresh_node +from .tasks import node_heartbeat, house_keeping from .links import tool_link, node_list -from .models import Node +from .models import Node, ClusteringConfig -NODE_REFRESH_INTERVAL = 1 - -register_interval_job('refresh_node', _(u'Update a node\'s properties.'), refresh_node, seconds=NODE_REFRESH_INTERVAL) +register_interval_job('node_heartbeat', _(u'Update a node\'s properties.'), node_heartbeat, seconds=ClusteringConfig.get().node_heartbeat_interval) +register_interval_job('house_keeping', _(u'Check for unresponsive nodes in the cluster list.'), house_keeping, seconds=1) register_tool(tool_link) bind_links([Node, 'node_list'], [node_list], menu_name='secondary_menu') diff --git a/apps/clustering/models.py b/apps/clustering/models.py index 67ded6cf04..764691aadc 100644 --- a/apps/clustering/models.py +++ b/apps/clustering/models.py @@ -11,30 +11,47 @@ from django.db import close_connection from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext +from common.models import Singleton + +DEFAULT_NODE_TTL = 5 +DEFAULT_NODE_HEARTBEAT_INTERVAL = 1 + class NodeManager(models.Manager): def myself(self): - node, created = self.model.objects.get_or_create(hostname=platform.node(), defaults={'memory_usage': 100}) + node, created = self.model.objects.get_or_create(hostname=platform.node()) node.refresh() + if created: + # Store the refresh data because is a new instance + node.save() return node class Node(models.Model): hostname = models.CharField(max_length=255, verbose_name=_(u'hostname')) - cpuload = models.PositiveIntegerField(blank=True, default=0, verbose_name=_(u'cpu load')) + cpuload = models.FloatField(blank=True, default=0.0, verbose_name=_(u'cpu load')) heartbeat = models.DateTimeField(blank=True, default=datetime.datetime.now(), verbose_name=_(u'last heartbeat check')) - memory_usage = models.FloatField(blank=True, verbose_name=_(u'memory usage')) - - objects = NodeManager() + memory_usage = models.FloatField(blank=True, default=0.0, verbose_name=_(u'memory usage')) + objects = NodeManager() + + @classmethod + def platform_info(cls): + return { + 'cpuload': psutil.cpu_percent(), + 'memory_usage': psutil.phymem_usage().percent + } + def __unicode__(self): return self.hostname def refresh(self): - self.cpuload = psutil.cpu_percent() - self.memory_usage = psutil.phymem_usage().percent - self.save() - + if self.hostname == platform.node(): + # Make we can only update ourselves + info = Node.platform_info() + self.cpuload = info['cpuload'] + self.memory_usage = info['memory_usage'] + def save(self, *args, **kwargs): self.heartbeat = datetime.datetime.now() return super(Node, self).save(*args, **kwargs) @@ -42,3 +59,30 @@ class Node(models.Model): class Meta: verbose_name = _(u'node') verbose_name_plural = _(u'nodes') + + +class ClusteringConfigManager(models.Manager): + def dead_nodes(self): + return Node.objects.filter(heartbeat__lt=datetime.datetime.now() - datetime.timedelta(seconds=self.model.get().node_time_to_live)) + + def delete_dead_nodes(self): + self.dead_nodes().delete() + + def zombiest_node(self): + try: + return self.dead_nodes().order_by('-heartbeat')[0] + except IndexError: + return None + + +class ClusteringConfig(Singleton): + node_time_to_live = models.PositiveIntegerField(verbose_name=(u'time to live (in seconds)'), default=DEFAULT_NODE_TTL) # After this time a worker is considered dead + node_heartbeat_interval = models.PositiveIntegerField(verbose_name=(u'heartbeat interval'), default=DEFAULT_NODE_HEARTBEAT_INTERVAL) + + objects = ClusteringConfigManager() + + def __unicode__(self): + return ugettext('clustering config') + + class Meta: + verbose_name = verbose_name_plural = _(u'clustering config') diff --git a/apps/clustering/tasks.py b/apps/clustering/tasks.py index aa9f01b53e..c5938047d9 100644 --- a/apps/clustering/tasks.py +++ b/apps/clustering/tasks.py @@ -4,14 +4,22 @@ import logging from lock_manager.decorators import simple_locking -from .models import Node +from .models import Node, ClusteringConfig LOCK_EXPIRE = 10 logger = logging.getLogger(__name__) -@simple_locking('refresh_node', 10) -def refresh_node(): +@simple_locking('node_heartbeat', 10) +def node_heartbeat(): logger.debug('starting') - node = Node.objects.myself() # Automatically calls the refresh() method too + node = Node.objects.myself() + node.save() + + +@simple_locking('house_keeping', 10) +def house_keeping(): + logger.debug('starting') + ClusteringConfig.objects.delete_dead_nodes() + From c3ac4a623a782ac27c91a9f50c9bcf532364b545 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Mon, 30 Jul 2012 23:51:29 -0400 Subject: [PATCH 33/40] Add clustering migrations --- .../0002_auto__add_clusteringconfig.py | 44 +++++++++++++++++++ .../0003_auto__chg_field_node_cpuload.py | 38 ++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 apps/clustering/migrations/0002_auto__add_clusteringconfig.py create mode 100644 apps/clustering/migrations/0003_auto__chg_field_node_cpuload.py diff --git a/apps/clustering/migrations/0002_auto__add_clusteringconfig.py b/apps/clustering/migrations/0002_auto__add_clusteringconfig.py new file mode 100644 index 0000000000..26acb2d393 --- /dev/null +++ b/apps/clustering/migrations/0002_auto__add_clusteringconfig.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'ClusteringConfig' + db.create_table('clustering_clusteringconfig', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('lock_id', self.gf('django.db.models.fields.CharField')(default=1, unique=True, max_length=1)), + ('node_time_to_live', self.gf('django.db.models.fields.PositiveIntegerField')()), + ('node_heartbeat_interval', self.gf('django.db.models.fields.PositiveIntegerField')()), + )) + db.send_create_signal('clustering', ['ClusteringConfig']) + + + def backwards(self, orm): + # Deleting model 'ClusteringConfig' + db.delete_table('clustering_clusteringconfig') + + + models = { + 'clustering.clusteringconfig': { + 'Meta': {'object_name': 'ClusteringConfig'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'lock_id': ('django.db.models.fields.CharField', [], {'default': '1', 'unique': 'True', 'max_length': '1'}), + 'node_heartbeat_interval': ('django.db.models.fields.PositiveIntegerField', [], {}), + 'node_time_to_live': ('django.db.models.fields.PositiveIntegerField', [], {}) + }, + 'clustering.node': { + 'Meta': {'object_name': 'Node'}, + 'cpuload': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'blank': 'True'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'memory_usage': ('django.db.models.fields.FloatField', [], {'blank': 'True'}) + } + } + + complete_apps = ['clustering'] \ No newline at end of file diff --git a/apps/clustering/migrations/0003_auto__chg_field_node_cpuload.py b/apps/clustering/migrations/0003_auto__chg_field_node_cpuload.py new file mode 100644 index 0000000000..7cfbc5b1c3 --- /dev/null +++ b/apps/clustering/migrations/0003_auto__chg_field_node_cpuload.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + + # Changing field 'Node.cpuload' + db.alter_column('clustering_node', 'cpuload', self.gf('django.db.models.fields.FloatField')()) + + def backwards(self, orm): + + # Changing field 'Node.cpuload' + db.alter_column('clustering_node', 'cpuload', self.gf('django.db.models.fields.PositiveIntegerField')()) + + models = { + 'clustering.clusteringconfig': { + 'Meta': {'object_name': 'ClusteringConfig'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'lock_id': ('django.db.models.fields.CharField', [], {'default': '1', 'unique': 'True', 'max_length': '1'}), + 'node_heartbeat_interval': ('django.db.models.fields.PositiveIntegerField', [], {'default': '1'}), + 'node_time_to_live': ('django.db.models.fields.PositiveIntegerField', [], {'default': '5'}) + }, + 'clustering.node': { + 'Meta': {'object_name': 'Node'}, + 'cpuload': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'blank': 'True'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'memory_usage': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'blank': 'True'}) + } + } + + complete_apps = ['clustering'] \ No newline at end of file From ffaf01c637cad68c6fb5c4be8f88277bb1b8171d Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Tue, 31 Jul 2012 01:19:08 -0400 Subject: [PATCH 34/40] Add job queue list, pending, active and error queue items view, links and permissions --- apps/job_processor/__init__.py | 13 ++- apps/job_processor/links.py | 15 ++- ...field_worker_name__add_field_worker_pid.py | 66 +++++++++++ apps/job_processor/models.py | 30 ++++- apps/job_processor/permissions.py | 2 +- .../static/images/icons/hourglass.png | Bin 0 -> 2012 bytes apps/job_processor/tasks.py | 5 +- apps/job_processor/urls.py | 13 +-- apps/job_processor/views.py | 108 ++++++++++++++++-- 9 files changed, 215 insertions(+), 37 deletions(-) create mode 100644 apps/job_processor/migrations/0004_auto__del_field_worker_name__add_field_worker_pid.py create mode 100755 apps/job_processor/static/images/icons/hourglass.png diff --git a/apps/job_processor/__init__.py b/apps/job_processor/__init__.py index eaa6339041..b66ada46cf 100644 --- a/apps/job_processor/__init__.py +++ b/apps/job_processor/__init__.py @@ -7,16 +7,21 @@ from navigation.api import bind_links, register_model_list_columns from project_tools.api import register_tool from common.utils import encapsulate -from .tasks import job_queue_poll -from .links import node_workers from clustering.models import Node +from .models import JobQueue +from .tasks import job_queue_poll +from .links import (node_workers, job_queues, tool_link, + job_queue_items_pending, job_queue_items_error, job_queue_items_active) + JOB_QUEUE_POLL_INTERVAL = 1 register_interval_job('job_queue_poll', _(u'Poll a job queue for pending jobs.'), job_queue_poll, seconds=JOB_QUEUE_POLL_INTERVAL) -#register_tool(tool_link) -#bind_links([Node, 'node_list'], [node_list], menu_name='secondary_menu') +register_tool(tool_link) +bind_links([JobQueue, 'job_queues'], [job_queues], menu_name='secondary_menu') +bind_links([JobQueue], [job_queue_items_pending, job_queue_items_active, job_queue_items_error]) + bind_links([Node], [node_workers]) Node.add_to_class('workers', lambda node: node.worker_set) diff --git a/apps/job_processor/links.py b/apps/job_processor/links.py index 5d6dbb44cc..222c95d04c 100644 --- a/apps/job_processor/links.py +++ b/apps/job_processor/links.py @@ -4,12 +4,15 @@ from django.utils.translation import ugettext_lazy as _ from navigation.api import Link -from clustering.permissions import (PERMISSION_NODES_VIEW) +from clustering.permissions import PERMISSION_NODES_VIEW + +from .permissions import PERMISSION_JOB_QUEUE_VIEW node_workers = Link(text=_(u'workers'), view='node_workers', args='object.pk', sprite='lorry_go', permissions=[PERMISSION_NODES_VIEW]) -#index_setup_create = Link(text=_(u'create index'), view='index_setup_create', sprite='tab_add', permissions=[PERMISSION_DOCUMENT_INDEXING_CREATE]) -#index_setup_edit = Link(text=_(u'edit'), view='index_setup_edit', args='index.pk', sprite='tab_edit', permissions=[PERMISSION_DOCUMENT_INDEXING_EDIT]) -#index_setup_delete = Link(text=_(u'delete'), view='index_setup_delete', args='index.pk', sprite='tab_delete', permissions=[PERMISSION_DOCUMENT_INDEXING_DELETE]) -#index_setup_view = Link(text=_(u'tree template'), view='index_setup_view', args='index.pk', sprite='textfield', permissions=[PERMISSION_DOCUMENT_INDEXING_SETUP]) -#index_setup_document_types = Link(text=_(u'document types'), view='index_setup_document_types', args='index.pk', sprite='layout', permissions=[PERMISSION_DOCUMENT_INDEXING_EDIT]) # children_view_regex=[r'^index_setup', r'^template_node']) + +tool_link = Link(text=_(u'job queues'), view='job_queues', icon='hourglass.png', permissions=[PERMISSION_JOB_QUEUE_VIEW]) +job_queues = Link(text=_(u'job queues list'), view='job_queues', sprite='hourglass', permissions=[PERMISSION_JOB_QUEUE_VIEW]) +job_queue_items_pending = Link(text=_(u'pending jobs'), view='job_queue_items_pending', args='object.pk', sprite='text_list_bullets', permissions=[PERMISSION_JOB_QUEUE_VIEW]) +job_queue_items_error = Link(text=_(u'error jobs'), view='job_queue_items_error', args='object.pk', sprite='text_list_bullets', permissions=[PERMISSION_JOB_QUEUE_VIEW]) +job_queue_items_active = Link(text=_(u'active jobs'), view='job_queue_items_active', args='object.pk', sprite='text_list_bullets', permissions=[PERMISSION_JOB_QUEUE_VIEW]) diff --git a/apps/job_processor/migrations/0004_auto__del_field_worker_name__add_field_worker_pid.py b/apps/job_processor/migrations/0004_auto__del_field_worker_name__add_field_worker_pid.py new file mode 100644 index 0000000000..77c4260b45 --- /dev/null +++ b/apps/job_processor/migrations/0004_auto__del_field_worker_name__add_field_worker_pid.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Deleting field 'Worker.name' + db.delete_column('job_processor_worker', 'name') + + # Adding field 'Worker.pid' + db.add_column('job_processor_worker', 'pid', + self.gf('django.db.models.fields.PositiveIntegerField')(default=1, max_length=255), + keep_default=False) + + + def backwards(self, orm): + + # User chose to not deal with backwards NULL issues for 'Worker.name' + raise RuntimeError("Cannot reverse this migration. 'Worker.name' and its values cannot be restored.") + # Deleting field 'Worker.pid' + db.delete_column('job_processor_worker', 'pid') + + + models = { + 'clustering.node': { + 'Meta': {'object_name': 'Node'}, + 'cpuload': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'blank': 'True'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'hostname': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'memory_usage': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'blank': 'True'}) + }, + 'job_processor.jobqueue': { + 'Meta': {'object_name': 'JobQueue'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}), + 'unique_jobs': ('django.db.models.fields.BooleanField', [], {'default': 'True'}) + }, + 'job_processor.jobqueueitem': { + 'Meta': {'ordering': "('creation_datetime',)", 'object_name': 'JobQueueItem'}, + 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'job_queue': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['job_processor.JobQueue']"}), + 'job_type': ('django.db.models.fields.CharField', [], {'max_length': '32'}), + 'kwargs': ('django.db.models.fields.TextField', [], {}), + 'result': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'state': ('django.db.models.fields.CharField', [], {'default': "'p'", 'max_length': '4'}), + 'unique_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'blank': 'True'}) + }, + 'job_processor.worker': { + 'Meta': {'ordering': "('creation_datetime',)", 'object_name': 'Worker'}, + 'creation_datetime': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)'}), + 'heartbeat': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime(2012, 7, 30, 0, 0)', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'job_queue_item': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['job_processor.JobQueueItem']"}), + 'node': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['clustering.Node']"}), + 'pid': ('django.db.models.fields.PositiveIntegerField', [], {'max_length': '255'}), + 'state': ('django.db.models.fields.CharField', [], {'default': "'r'", 'max_length': '4'}) + } + } + + complete_apps = ['job_processor'] \ No newline at end of file diff --git a/apps/job_processor/models.py b/apps/job_processor/models.py index 72a9142c51..ed10c670bb 100644 --- a/apps/job_processor/models.py +++ b/apps/job_processor/models.py @@ -35,7 +35,7 @@ class Job(object): # Run sync or launch async subprocess # OR launch 2 processes: monitor & actual process node = Node.objects.myself() - worker = Worker.objects.create(node=node, name=os.getpid(), job_queue_item=job_queue_item) + worker = Worker.objects.create(node=node, pid=os.getpid(), job_queue_item=job_queue_item) try: close_connection() transaction.commit_on_success(function)(**loads(job_queue_item.kwargs)) @@ -62,6 +62,9 @@ class JobType(object): self.function = function job_types_registry[self.name] = self + def __unicode__(self): + return unicode(self.label) + def run(self, job_queue_item, **kwargs): job_queue_item.state = JOB_STATE_PROCESSING job_queue_item.save() @@ -111,6 +114,14 @@ class JobQueue(models.Model): @property def pending_jobs(self): return self.items.filter(state=JOB_STATE_PENDING) + + @property + def error_jobs(self): + return self.items.filter(state=JOB_STATE_ERROR) + + @property + def active_jobs(self): + return self.items.filter(state=JOB_STATE_PROCESSING) @property def items(self): @@ -161,9 +172,19 @@ class JobQueueItem(models.Model): # TODO: Maybe replace instead of rasining exception w/ replace flag raise JobQueuePushError + def get_job_type(self): + return job_types_registry.get(self.job_type) + def run(self): - job_type_instance = job_types_registry.get(self.job_type) + job_type_instance = self.get_job_type() job_type_instance.run(self) + + @property + def worker(self): + try: + return self.worker_set.get() + except Worker.DoesNotExist: + return None class Meta: ordering = ('creation_datetime',) @@ -173,7 +194,7 @@ class JobQueueItem(models.Model): class Worker(models.Model): node = models.ForeignKey(Node, verbose_name=_(u'node')) - name = models.CharField(max_length=255, verbose_name=_(u'name')) + pid = models.PositiveIntegerField(max_length=255, verbose_name=_(u'name')) creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime'), default=lambda: datetime.datetime.now(), editable=False) heartbeat = models.DateTimeField(blank=True, default=datetime.datetime.now(), verbose_name=_(u'heartbeat check')) state = models.CharField(max_length=4, @@ -183,7 +204,7 @@ class Worker(models.Model): job_queue_item = models.ForeignKey(JobQueueItem, verbose_name=_(u'job queue item')) def __unicode__(self): - return u'%s-%s' % (self.node.hostname, self.name) + return u'%s-%s' % (self.node.hostname, self.pid) #def disable(self): # if self.state == WORKER_STATE_DISABLED: @@ -211,7 +232,6 @@ class Worker(models.Model): class JobProcessingConfig(Singleton): worker_time_to_live = models.PositiveInteger(verbose_name=(u'time to live (in seconds)') # After this time a worker is considered dead worker_heartbeat_interval = models.PositiveInteger(verbose_name=(u'heartbeat interval') - node_heartbeat_interval = models.PositiveInteger(verbose_name=(u'heartbeat interval') def __unicode__(self): return ugettext('Workers configuration') diff --git a/apps/job_processor/permissions.py b/apps/job_processor/permissions.py index 4b5988c48d..74c06b609a 100644 --- a/apps/job_processor/permissions.py +++ b/apps/job_processor/permissions.py @@ -5,4 +5,4 @@ from django.utils.translation import ugettext_lazy as _ from permissions.models import PermissionNamespace, Permission namespace = PermissionNamespace('job_processor', _(u'Job processor')) -#PERMISSION_NODES_VIEW = Permission.objects.register(namespace, 'nodes_view', _(u'View the registeres nodes in a Mayan cluster')) +PERMISSION_JOB_QUEUE_VIEW = Permission.objects.register(namespace, 'job_queue_view', _(u'View the job queues in a Mayan cluster')) diff --git a/apps/job_processor/static/images/icons/hourglass.png b/apps/job_processor/static/images/icons/hourglass.png new file mode 100755 index 0000000000000000000000000000000000000000..cd14446b9fc6d0ae23b02e1a1b9e4a9b458cf39e GIT binary patch literal 2012 zcmV<22P622P)R~7!|&U>Hs+D>-k z*ohN0N~Od|k$6-=NL58qlhy&11}Y&W5+EcIqEZq0K?y~ODlHThL_w6a1&JRO;RmT& zqP9p>K$@yZ2Dy%K?D`eQ>$TVW@V<9uXNGfUW>Y`1&IYb@_Rh?`d(OG%^_?p?j)Ue9 z1mWi6?@#aBrO52Ff`hvxQMiYeIvIpMv@vWrbHp~+u$(K7fCzo>CpbIGC(K z)H@(4LHb-%-#-8Ir)cSCM3v_D^$O?$ovJkb7j7HK;XbI5eu#lSi1HxKz5Y3!78VfP zFIHt9mRY5F8Af3O<>DMI3;rtf%@hzVK`4w~o`}S@1fl9_1f$o$$SmPl2+q3GZy25q zuApMJ1k12cS}DV*HIQ2(YZ7lRV~rEn#Xtv50}l*#eDn*)zy0fAV*x@n3u$Q>4!L5T zl~+a7+IK#4)2qD)poU|ZF%!7(;!jRo8p;2YmZpjFtzw`|G)o)n_w^1fR`=}R$`qiB zj3$%bA_>mb0F?n`6v#Bo^i8B$;v{@H^KTTxsL}rP-`-!mH1fnhh1& zvV8rWa97XgSa%$E_W(O0Jaj$A6Io!24C08cYvC}@^v!3RW=b3g#C?7?1r~nWbLJA&zut~X`a-$mH{Z6K$(S-!B6c#Gt$t_3{)bGh%09|lf?GI)3WHuOKJS} zr)SP_o*Q!Y78baf#xoOF=a=TDFDA6;k!(*t#_7a6Lly#!IrKRt2>CZ4$+8=8jGx%C zU`n0Hb{&Nv>k!F0W8>G6nxDNmJ8?DXE3jd(778?o3Qv6F$XTPD$*^1!ACU3#21%-~ z&;ezLY8?=Sne5pU51l1mD?ZP*6@Z_ssgpwo9yANf z*+ymo!;6ffXeaF23WAXYjA{iYMVUpjRV|_rI}A%wF*%b$YH>Mx>d1k^e&AJF3E4~m z%MY5_6AyjlaB}=#ubZhET)ECL1u9BX2%4zFD&}C=AruBqB5CzwVtf|YMu%TNIW)+H z&-y&9RzidpV6=L05a=X|M>@OWPyXuFA8r}g_p$BUBA^h!N|n-$BB_urVP<^fy(do& zeXo$qr2LcT{LI=I>Aqan2Dotsln0n1#{YTa!ucOFs{i$dx56xs8fFE2jql-m{r{M^ zW8RJeO?Q+&b?9jE;p2~=VzP>WKH2lIT^vLcV}}K2MoZec||L_MX~IiB|orNaKxAtRS7hJ!qRvu9&iF$4f6~cLm zBnniTYjPh-fkBwsZb*`+=(X`VvPSadZyo)_*Ven^wiY;a^sC*9t{<~v{pb$^-C_w_ z>;Fb)V;Z7kqg*b*G$j}oC#sCqBs#ONp(k}7a`lE=_V(V@N3C4oD-RH_JIw;=z6T$D zx}kR?r~-q5c|^@A58ed&z*N{Ip+OO7nl_~%5xtuxgIzBpbmJ9h$v55Y@$PQid+5<; zTH#JBFw#UxzTb|C_#2NsYF>NZSO#l_+$G0-TYH{u{3Xh3zS3#L=+l8jP$nU2leM#M9N z11q`*Opn0{QUlvF=ype@Bes;a;a>2^$M$_EEF0e)yZcn>+j~KAb8A_|1r0!Q2cD1>ZG_ uKZ`yIcc_5l*MG}zNDY5>R%5|`1sDLP@LrTpzu7$i0000\d+)/workers/$', 'node_workers', (), 'node_workers'), - #url(r'^create/$', 'folder_create', (), 'folder_create'), - #url(r'^(?P\d+)/edit/$', 'folder_edit', (), 'folder_edit'), - #url(r'^(?P\d+)/delete/$', 'folder_delete', (), 'folder_delete'), - #url(r'^(?P\d+)/$', 'folder_view', (), 'folder_view'), - #url(r'^(?P\d+)/remove/document/multiple/$', 'folder_document_multiple_remove', (), 'folder_document_multiple_remove'), - #url(r'^document/(?P\d+)/folder/add/$', 'folder_add_document', (), 'folder_add_document'), - #url(r'^document/(?P\d+)/folder/list/$', 'document_folder_list', (), 'document_folder_list'), - #url(r'^(?P\d+)/acl/list/$', 'folder_acl_list', (), 'folder_acl_list'), + url(r'^queue/list/$', 'job_queues', (), 'job_queues'), + url(r'^queue/(?P\d+)/items/pending/$', 'job_queue_items', {'pending_jobs': True}, 'job_queue_items_pending'), + url(r'^queue/(?P\d+)/items/error/$', 'job_queue_items', {'error_jobs' :True}, 'job_queue_items_error'), + url(r'^queue/(?P\d+)/items/active/$', 'job_queue_items', {'active_jobs' :True}, 'job_queue_items_active'), ) diff --git a/apps/job_processor/views.py b/apps/job_processor/views.py index b1b5461c0b..101697d46a 100644 --- a/apps/job_processor/views.py +++ b/apps/job_processor/views.py @@ -8,13 +8,15 @@ from django.contrib.contenttypes.models import ContentType from django.db.models.loading import get_model from django.http import Http404 from django.core.exceptions import PermissionDenied - from permissions.models import Permission from common.utils import encapsulate from acls.models import AccessEntry from clustering.permissions import PERMISSION_NODES_VIEW from clustering.models import Node +from .models import JobQueue +from .permissions import PERMISSION_JOB_QUEUE_VIEW + def node_workers(request, node_pk): node = get_object_or_404(Node, pk=node_pk) @@ -28,7 +30,7 @@ def node_workers(request, node_pk): 'object_list': node.workers().all(), 'title': _(u'workers for node: %s') % node, 'object': node, - 'hide_links': True, + 'hide_link': True, 'extra_columns': [ { 'name': _(u'created'), @@ -48,7 +50,11 @@ def node_workers(request, node_pk): }, { 'name': _(u'job type'), - 'attribute': 'job_queue_item.job_type', + 'attribute': 'job_queue_item.get_job_type', + }, + { + 'name': _(u'job queue'), + 'attribute': 'job_queue_item.job_queue', }, ], } @@ -57,11 +63,91 @@ def node_workers(request, node_pk): context_instance=RequestContext(request)) - node = models.ForeignKey(Node, verbose_name=_(u'node')) - name = models.CharField(max_length=255, verbose_name=_(u'name')) - creation_datetime = models.DateTimeField(verbose_name=_(u'creation datetime'), default=lambda: datetime.datetime.now(), editable=False) - heartbeat = models.DateTimeField(blank=True, default=datetime.datetime.now(), verbose_name=_(u'heartbeat check')) - stat#e = models.CharField(max_length=4, - #choices=WORKER_STATE_CHOICES, - #default=WORKER_STATE_RUNNING, - #verbose_name=_(u'state')) +def job_queues(request): + # TODO: permissiong list filtering + Permission.objects.check_permissions(request.user, [PERMISSION_JOB_QUEUE_VIEW]) + + context = { + 'object_list': JobQueue.objects.all(), + 'title': _(u'job queue'), + 'hide_link': True, + 'extra_columns': [ + { + 'name': _(u'pending jobs'), + 'attribute': 'pending_jobs.count', + }, + { + 'name': _(u'active jobs'), + 'attribute': 'active_jobs.count', + }, + { + 'name': _(u'error jobs'), + 'attribute': 'error_jobs.count', + }, + ], + } + + return render_to_response('generic_list.html', context, + context_instance=RequestContext(request)) + + +def job_queue_items(request, job_queue_pk, pending_jobs=False, error_jobs=False, active_jobs=False): + job_queue = get_object_or_404(JobQueue, pk=job_queue_pk) + + try: + Permission.objects.check_permissions(request.user, [PERMISSION_JOB_QUEUE_VIEW]) + except PermissionDenied: + AccessEntry.objects.check_access(PERMISSION_JOB_QUEUE_VIEW, request.user, job_queue) + + jobs = set() + if pending_jobs: + jobs = job_queue.pending_jobs.all() + title = _(u'pending jobs for queue: %s') % job_queue + + if error_jobs: + jobs = job_queue.error_jobs.all() + title = _(u'error jobs for queue: %s') % job_queue + + if active_jobs: + jobs = job_queue.active_jobs.all() + title = _(u'active jobs for queue: %s') % job_queue + + context = { + 'object_list': jobs, + 'title': title, + 'object': job_queue, + 'hide_link': True, + 'extra_columns': [ + { + 'name': _(u'created'), + 'attribute': 'creation_datetime', + }, + { + 'name': _(u'job type'), + 'attribute': 'get_job_type', + }, + { + 'name': _(u'arguments'), + 'attribute': 'kwargs', + }, + ], + } + + if active_jobs: + context['extra_columns'].append( + { + 'name': _(u'worker'), + 'attribute': encapsulate(lambda x: x.worker or _(u'Unknown')), + } + ) + + if error_jobs: + context['extra_columns'].append( + { + 'name': _(u'result'), + 'attribute': 'result', + } + ) + + return render_to_response('generic_list.html', context, + context_instance=RequestContext(request)) From 859109c37879814ad6ef44233097d8d027322de6 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 1 Aug 2012 01:41:37 -0400 Subject: [PATCH 35/40] Refactor scheduler --- apps/scheduler/__init__.py | 51 ++++-------- apps/scheduler/api.py | 153 ++++++++++++++++++++++++++++++---- apps/scheduler/exceptions.py | 12 +++ apps/scheduler/links.py | 6 +- apps/scheduler/literals.py | 1 + apps/scheduler/permissions.py | 3 +- apps/scheduler/runtime.py | 4 - apps/scheduler/urls.py | 3 +- apps/scheduler/views.py | 57 ++++++++++--- 9 files changed, 220 insertions(+), 70 deletions(-) create mode 100644 apps/scheduler/literals.py delete mode 100644 apps/scheduler/runtime.py diff --git a/apps/scheduler/__init__.py b/apps/scheduler/__init__.py index e9e693b024..25fcbf6190 100644 --- a/apps/scheduler/__init__.py +++ b/apps/scheduler/__init__.py @@ -2,46 +2,31 @@ from __future__ import absolute_import import logging import atexit +import sys -from .runtime import scheduler - -from django.db.models.signals import post_syncdb -from django.dispatch import receiver - -from south.signals import pre_migrate - -from signaler.signals import pre_collectstatic from project_tools.api import register_tool +from navigation.api import bind_links + +from .links import scheduler_tool_link, scheduler_list, job_list +from .literals import SHUTDOWN_COMMANDS +from .api import LocalScheduler -from .links import job_list - logger = logging.getLogger(__name__) -# TODO: shutdown scheduler on pre_syncdb to avoid accessing non existing models - -@receiver(post_syncdb, dispatch_uid='scheduler_shutdown_post_syncdb') -def scheduler_shutdown_post_syncdb(sender, **kwargs): - logger.debug('Scheduler shut down on post syncdb signal') - scheduler.shutdown() - - -@receiver(pre_collectstatic, dispatch_uid='sheduler_shutdown_pre_collectstatic') -def sheduler_shutdown_pre_collectstatic(sender, **kwargs): - logger.debug('Scheduler shut down on collectstatic signal') - scheduler.shutdown() - - -@receiver(pre_migrate, dispatch_uid='sheduler_shutdown_pre_migrate') -def sheduler_shutdown_pre_migrate(sender, **kwargs): - logger.debug('Scheduler shut down on pre_migrate signal') - scheduler.shutdown() - - def schedule_shutdown_on_exit(): - logger.debug('Scheduler shut down on exit') - scheduler.shutdown() + logger.debug('Schedulers shut down on exit') + LocalScheduler.shutdown_all() -register_tool(job_list) +if any([command in sys.argv for command in SHUTDOWN_COMMANDS]): + logger.debug('Schedulers shut down on SHUTDOWN_COMMAND') + # Shutdown any scheduler already running + LocalScheduler.shutdown_all() + # Prevent any new scheduler afterwards to start + LocalScheduler.lockdown() + +register_tool(scheduler_tool_link) atexit.register(schedule_shutdown_on_exit) +bind_links([LocalScheduler, 'scheduler_list', 'job_list'], scheduler_list, menu_name='secondary_menu') +bind_links([LocalScheduler], job_list) diff --git a/apps/scheduler/api.py b/apps/scheduler/api.py index 6ce39bc3c0..aeebd6fbec 100644 --- a/apps/scheduler/api.py +++ b/apps/scheduler/api.py @@ -1,30 +1,147 @@ from __future__ import absolute_import -from .runtime import scheduler -from .exceptions import AlreadyScheduled +import logging -registered_jobs = {} +from apscheduler.scheduler import Scheduler as OriginalScheduler + +from django.utils.translation import ugettext_lazy as _ + +from .exceptions import AlreadyScheduled, UnknownJobClass + +logger = logging.getLogger(__name__) -def register_interval_job(name, title, func, weeks=0, days=0, hours=0, minutes=0, - seconds=0, start_date=None, args=None, - kwargs=None, job_name=None, **options): +class SchedulerJobBase(object): + job_type = u'' - if name in registered_jobs: - raise AlreadyScheduled + def __init__(self, name, label, function, *args, **kwargs): + self.scheduler = None + self.name = name + self.label = label + self.function = function + self.args = args + self.kwargs = kwargs - job = scheduler.add_interval_job(func=func, weeks=weeks, days=days, - hours=hours, minutes=minutes, seconds=seconds, - start_date=start_date, args=args, kwargs=kwargs, **options) + def stop(self): + self.scheduler.stop_job(self) - registered_jobs[name] = {'title': title, 'job': job} + @property + def running(self): + if self.scheduler: + return self.scheduler.running + else: + return False + + @property + def start_date(self): + return self._job.trigger.start_date -def remove_job(name): - if name in registered_jobs: - scheduler.unschedule_job(registered_jobs[name]['job']) - registered_jobs.pop(name) +class IntervalJob(SchedulerJobBase): + job_type = _(u'Interval job') + + def start(self, scheduler): + scheduler.add_job(self) -def get_job_list(): - return registered_jobs.values() +class DateJob(SchedulerJobBase): + job_type = _(u'Date job') + + def start(self, scheduler): + scheduler.add_job(self) + + +class LocalScheduler(object): + scheduler_registry = {} + lockdown = False + + @classmethod + def get(cls, name): + return cls.scheduler_registry[name] + + @classmethod + def get_all(cls): + return cls.scheduler_registry.values() + + @classmethod + def shutdown_all(cls): + for scheduler in cls.scheduler_registry.values(): + scheduler.stop() + + @classmethod + def lockdown(cls): + cls.lockdown = True + + def __init__(self, name, label=None): + self.scheduled_jobs = {} + self._scheduler = None + self.name = name + self.label = label + self.__class__.scheduler_registry[self.name] = self + + def start(self): + logger.debug('starting scheduler: %s' % self.name) + if not self.__class__.lockdown: + self._scheduler = OriginalScheduler() + for job in self.scheduled_jobs.values(): + self._schedule_job(job) + + self._scheduler.start() + else: + logger.debug('lockdown in effect') + + def stop(self): + if self._scheduler: + self._scheduler.shutdown() + del self._scheduler + self._scheduler = None + + @property + def running(self): + if self._scheduler: + return self._scheduler.running + else: + return False + + def clear(self): + for job in self.scheduled_jobs.values(): + self.stop_job(job) + + def stop_job(self, job): + self._scheduler.unschedule_job(job._job) + del(self.scheduled_jobs[job.name]) + job.scheduler = None + + def _schedule_job(self, job): + if isinstance(job, IntervalJob): + job._job = self._scheduler.add_interval_job(job.function, *job.args, **job.kwargs) + elif isinstance(job, DateJob): + job._job = self._scheduler.add_date_job(job.function, *job.args, **job.kwargs) + else: + raise UnknownJobClass + + def add_job(self, job): + if job.scheduler or job.name in self.scheduled_jobs.keys(): + raise AlreadyScheduled + + if self._scheduler: + self._scheduler_job(job) + + job.scheduler = self + self.scheduled_jobs[job.name] = job + + def add_interval_job(self, name, label, function, *args, **kwargs): + job = IntervalJob(name=name, label=label, function=function, *args, **kwargs) + self.add_job(job) + return job + + def add_date_job(self, name, label, function, *args, **kwargs): + job = DateJob(name=name, label=label, function=function, *args, **kwargs) + self.add_job(job) + return job + + def get_job_list(self): + return self.scheduled_jobs.values() + + def __unicode__(self): + return unicode(self.label or self.name) diff --git a/apps/scheduler/exceptions.py b/apps/scheduler/exceptions.py index f30d9fe815..6d6515a76c 100644 --- a/apps/scheduler/exceptions.py +++ b/apps/scheduler/exceptions.py @@ -1,2 +1,14 @@ class AlreadyScheduled(Exception): + """ + Raised when trying to schedule a Job instance of anything after it was + already scheduled in any other scheduler + """ + pass + + +class UnknownJobClass(Exception): + """ + Raised when trying to schedule a Job that is not of a a type: + IntervalJob or DateJob + """ pass diff --git a/apps/scheduler/links.py b/apps/scheduler/links.py index 7808f3331c..9ad1a1ff10 100644 --- a/apps/scheduler/links.py +++ b/apps/scheduler/links.py @@ -4,6 +4,8 @@ from django.utils.translation import ugettext_lazy as _ from navigation.api import Link -from .permissions import PERMISSION_VIEW_JOB_LIST +from .permissions import PERMISSION_VIEW_JOB_LIST, PERMISSION_VIEW_SCHEDULER_LIST -job_list = Link(text=_(u'interval job list'), view='job_list', icon='time.png', permissions=[PERMISSION_VIEW_JOB_LIST]) +scheduler_tool_link = Link(text=_(u'local schedulers'), view='scheduler_list', icon='time.png', permissions=[PERMISSION_VIEW_SCHEDULER_LIST]) +scheduler_list = Link(text=_(u'scheduler list'), view='scheduler_list', sprite='time', permissions=[PERMISSION_VIEW_SCHEDULER_LIST]) +job_list = Link(text=_(u'interval job list'), view='job_list', args='object.name', sprite='timeline_marker', permissions=[PERMISSION_VIEW_JOB_LIST]) diff --git a/apps/scheduler/literals.py b/apps/scheduler/literals.py new file mode 100644 index 0000000000..b56b5148d7 --- /dev/null +++ b/apps/scheduler/literals.py @@ -0,0 +1 @@ +SHUTDOWN_COMMANDS = ['syncdb', 'migrate', 'schemamigration', 'datamigration', 'collectstatic', 'shell', 'shell_plus'] diff --git a/apps/scheduler/permissions.py b/apps/scheduler/permissions.py index 203f675ff4..2ef2343811 100644 --- a/apps/scheduler/permissions.py +++ b/apps/scheduler/permissions.py @@ -5,4 +5,5 @@ from django.utils.translation import ugettext_lazy as _ from permissions.models import PermissionNamespace, Permission namespace = PermissionNamespace('scheduler', _(u'Scheduler')) -PERMISSION_VIEW_JOB_LIST = Permission.objects.register(namespace, 'jobs_list', _(u'View the interval job list')) +PERMISSION_VIEW_SCHEDULER_LIST = Permission.objects.register(namespace, 'schedulers_list', _(u'View the local scheduler list')) +PERMISSION_VIEW_JOB_LIST = Permission.objects.register(namespace, 'jobs_list', _(u'View the local scheduler job list')) diff --git a/apps/scheduler/runtime.py b/apps/scheduler/runtime.py deleted file mode 100644 index a9440e946b..0000000000 --- a/apps/scheduler/runtime.py +++ /dev/null @@ -1,4 +0,0 @@ -from apscheduler.scheduler import Scheduler - -scheduler = Scheduler() -scheduler.start() diff --git a/apps/scheduler/urls.py b/apps/scheduler/urls.py index fde9602994..3630a34bbc 100644 --- a/apps/scheduler/urls.py +++ b/apps/scheduler/urls.py @@ -1,5 +1,6 @@ from django.conf.urls.defaults import patterns, url urlpatterns = patterns('scheduler.views', - url(r'^list/$', 'job_list', (), 'job_list'), + url(r'^scheduler/list/$', 'scheduler_list', (), 'scheduler_list'), + url(r'^scheduler/(?P\w+)/job/list/$', 'job_list', (), 'job_list'), ) diff --git a/apps/scheduler/views.py b/apps/scheduler/views.py index 597632eca7..fb622ce818 100644 --- a/apps/scheduler/views.py +++ b/apps/scheduler/views.py @@ -3,32 +3,67 @@ from __future__ import absolute_import from django.shortcuts import render_to_response from django.template import RequestContext from django.utils.translation import ugettext_lazy as _ +from django.http import Http404 from permissions.models import Permission -from common.utils import encapsulate -from .permissions import PERMISSION_VIEW_JOB_LIST -from .api import get_job_list +from .permissions import PERMISSION_VIEW_SCHEDULER_LIST, PERMISSION_VIEW_JOB_LIST +from .api import LocalScheduler -def job_list(request): - Permission.objects.check_permissions(request.user, [PERMISSION_VIEW_JOB_LIST]) +def scheduler_list(request): + Permission.objects.check_permissions(request.user, [PERMISSION_VIEW_SCHEDULER_LIST]) context = { - 'object_list': get_job_list(), - 'title': _(u'interval jobs'), + 'object_list': LocalScheduler.get_all(), + 'title': _(u'local schedulers'), 'extra_columns': [ + { + 'name': _(u'name'), + 'attribute': 'name' + }, { 'name': _(u'label'), - 'attribute': encapsulate(lambda job: job['title']) + 'attribute': 'label' + }, + { + 'name': _(u'running'), + 'attribute': 'running' + }, + ], + 'hide_object': True, + } + + return render_to_response('generic_list.html', context, + context_instance=RequestContext(request)) + + +def job_list(request, scheduler_name): + Permission.objects.check_permissions(request.user, [PERMISSION_VIEW_JOB_LIST]) + try: + scheduler = LocalScheduler.get(scheduler_name) + except: + raise Http404 + + context = { + 'object_list': scheduler.get_job_list(), + 'title': _(u'local jobs in scheduler: %s') % scheduler, + 'extra_columns': [ + { + 'name': _(u'name'), + 'attribute': 'name' + }, + { + 'name': _(u'label'), + 'attribute': 'label' }, { 'name': _(u'start date time'), - 'attribute': encapsulate(lambda job: job['job'].trigger.start_date) + 'attribute': 'start_date' }, { - 'name': _(u'interval'), - 'attribute': encapsulate(lambda job: job['job'].trigger.interval) + 'name': _(u'type'), + 'attribute': 'job_type' }, ], 'hide_object': True, From 344e738c48475271944a00eedbefda7054ed3ac2 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 1 Aug 2012 01:43:00 -0400 Subject: [PATCH 36/40] Update apps interface for the new LocalScheduler class --- apps/checkouts/__init__.py | 9 ++++++--- apps/checkouts/literals.py | 2 ++ apps/clustering/__init__.py | 8 +++++--- apps/dynamic_search/__init__.py | 9 +++++---- apps/job_processor/__init__.py | 7 +++++-- apps/ocr/__init__.py | 1 - apps/sources/__init__.py | 8 +++++--- apps/sources/models.py | 17 +++++++++-------- 8 files changed, 37 insertions(+), 24 deletions(-) diff --git a/apps/checkouts/__init__.py b/apps/checkouts/__init__.py index f176cfa01a..1991c69cf1 100644 --- a/apps/checkouts/__init__.py +++ b/apps/checkouts/__init__.py @@ -3,7 +3,7 @@ from __future__ import absolute_import from django.utils.translation import ugettext_lazy as _ from navigation.api import bind_links, register_top_menu -from scheduler.api import register_interval_job +from scheduler.api import LocalScheduler from documents.models import Document from acls.api import class_permissions @@ -14,6 +14,7 @@ from .permissions import (PERMISSION_DOCUMENT_CHECKOUT, from .links import checkout_list, checkout_document, checkout_info, checkin_document from .models import DocumentCheckout from .tasks import task_check_expired_check_outs +from .literals import CHECK_EXPIRED_CHECK_OUTS_INTERVAL def initialize_document_checkout_extra_methods(): @@ -34,6 +35,8 @@ class_permissions(Document, [ PERMISSION_DOCUMENT_RESTRICTIONS_OVERRIDE ]) -CHECK_EXPIRED_CHECK_OUTS_INTERVAL = 60 # Lowest check out expiration allowed -register_interval_job('task_check_expired_check_outs', _(u'Check expired check out documents and checks them in.'), task_check_expired_check_outs, seconds=CHECK_EXPIRED_CHECK_OUTS_INTERVAL) +checkouts_scheduler = LocalScheduler('checkouts', _(u'Document checkouts')) +checkouts_scheduler.add_interval_job('task_check_expired_check_outs', _(u'Check expired check out documents and checks them in.'), task_check_expired_check_outs, seconds=CHECK_EXPIRED_CHECK_OUTS_INTERVAL) +checkouts_scheduler.start() + initialize_document_checkout_extra_methods() diff --git a/apps/checkouts/literals.py b/apps/checkouts/literals.py index 23e9920984..22ac3c1279 100644 --- a/apps/checkouts/literals.py +++ b/apps/checkouts/literals.py @@ -14,3 +14,5 @@ STATE_LABELS = { STATE_CHECKED_OUT: _(u'checked out'), STATE_CHECKED_IN: _(u'checked in/available'), } + +CHECK_EXPIRED_CHECK_OUTS_INTERVAL = 60 # Lowest check out expiration allowed diff --git a/apps/clustering/__init__.py b/apps/clustering/__init__.py index f5b453f6d2..66a8549c26 100644 --- a/apps/clustering/__init__.py +++ b/apps/clustering/__init__.py @@ -2,7 +2,7 @@ from __future__ import absolute_import from django.utils.translation import ugettext_lazy as _ -from scheduler.api import register_interval_job +from scheduler.api import LocalScheduler from navigation.api import bind_links from project_tools.api import register_tool @@ -10,8 +10,10 @@ from .tasks import node_heartbeat, house_keeping from .links import tool_link, node_list from .models import Node, ClusteringConfig -register_interval_job('node_heartbeat', _(u'Update a node\'s properties.'), node_heartbeat, seconds=ClusteringConfig.get().node_heartbeat_interval) -register_interval_job('house_keeping', _(u'Check for unresponsive nodes in the cluster list.'), house_keeping, seconds=1) +clustering_scheduler = LocalScheduler('clustering', _(u'Clustering')) +clustering_scheduler.add_interval_job('node_heartbeat', _(u'Update a node\'s properties.'), node_heartbeat, seconds=ClusteringConfig.get().node_heartbeat_interval) +clustering_scheduler.add_interval_job('house_keeping', _(u'Check for unresponsive nodes in the cluster list.'), house_keeping, seconds=1) +clustering_scheduler.start() register_tool(tool_link) bind_links([Node, 'node_list'], [node_list], menu_name='secondary_menu') diff --git a/apps/dynamic_search/__init__.py b/apps/dynamic_search/__init__.py index 55718876be..ae5422c437 100644 --- a/apps/dynamic_search/__init__.py +++ b/apps/dynamic_search/__init__.py @@ -9,9 +9,8 @@ from django.core.management import call_command from navigation.api import register_sidebar_template, bind_links, Link from documents.models import Document -from scheduler.runtime import scheduler +from scheduler.api import LocalScheduler from signaler.signals import post_update_index, pre_update_index -from scheduler.api import register_interval_job from lock_manager import Lock, LockError from .models import IndexableObject @@ -36,7 +35,7 @@ def scheduler_shutdown_pre_update_index(sender, mayan_runtime, **kwargs): # Only shutdown the scheduler if the command is called from the command # line if not mayan_runtime: - scheduler.shutdown() + LocalScheduler.shutdown_all() def search_index_update(): @@ -61,4 +60,6 @@ def search_index_update(): bind_links(['search', 'search_advanced', 'results'], [search], menu_name='form_header') bind_links(['results'], [search_again], menu_name='sidebar') -register_interval_job('search_index_update', _(u'Update the search index with the most recent modified documents.'), search_index_update, seconds=INDEX_UPDATE_INTERVAL) +dynamic_search_scheduler = LocalScheduler('search', _(u'Search')) +dynamic_search_scheduler.add_interval_job('search_index_update', _(u'Update the search index with the most recent modified documents.'), search_index_update, seconds=INDEX_UPDATE_INTERVAL) +dynamic_search_scheduler.start() diff --git a/apps/job_processor/__init__.py b/apps/job_processor/__init__.py index b66ada46cf..66fb010bff 100644 --- a/apps/job_processor/__init__.py +++ b/apps/job_processor/__init__.py @@ -2,7 +2,7 @@ from __future__ import absolute_import from django.utils.translation import ugettext_lazy as _ -from scheduler.api import register_interval_job +from scheduler.api import LocalScheduler from navigation.api import bind_links, register_model_list_columns from project_tools.api import register_tool from common.utils import encapsulate @@ -14,9 +14,12 @@ from .tasks import job_queue_poll from .links import (node_workers, job_queues, tool_link, job_queue_items_pending, job_queue_items_error, job_queue_items_active) +#TODO: fix this, make it cluster wide JOB_QUEUE_POLL_INTERVAL = 1 -register_interval_job('job_queue_poll', _(u'Poll a job queue for pending jobs.'), job_queue_poll, seconds=JOB_QUEUE_POLL_INTERVAL) +job_processor_scheduler = LocalScheduler('job_processor', _(u'Job processor')) +job_processor_scheduler.add_interval_job('job_queue_poll', _(u'Poll a job queue for pending jobs.'), job_queue_poll, seconds=JOB_QUEUE_POLL_INTERVAL) +job_processor_scheduler.start() register_tool(tool_link) bind_links([JobQueue, 'job_queues'], [job_queues], menu_name='secondary_menu') diff --git a/apps/ocr/__init__.py b/apps/ocr/__init__.py index 4b04ad41f7..b5447d8fa7 100644 --- a/apps/ocr/__init__.py +++ b/apps/ocr/__init__.py @@ -15,7 +15,6 @@ from documents.models import Document, DocumentVersion from maintenance.api import register_maintenance_links from project_tools.api import register_tool from acls.api import class_permissions -from scheduler.api import register_interval_job from statistics.api import register_statistics from job_processor.models import JobQueue, JobType from job_processor.exceptions import JobQueuePushError diff --git a/apps/sources/__init__.py b/apps/sources/__init__.py index 4daa0ccd04..6ed77c392b 100644 --- a/apps/sources/__init__.py +++ b/apps/sources/__init__.py @@ -6,7 +6,7 @@ from navigation.api import (bind_links, register_model_list_columns) from common.utils import encapsulate from project_setup.api import register_setup -from scheduler.api import register_interval_job +from scheduler.api import LocalScheduler from documents.models import Document from .staging import StagingFile @@ -62,8 +62,10 @@ register_model_list_columns(StagingFile, [ register_setup(setup_sources) -register_interval_job('task_fetch_pop3_emails', _(u'Connects to the POP3 email sources and fetches the attached documents.'), task_fetch_pop3_emails, seconds=EMAIL_PROCESSING_INTERVAL) -register_interval_job('task_fetch_imap_emails', _(u'Connects to the IMAP email sources and fetches the attached documents.'), task_fetch_imap_emails, seconds=EMAIL_PROCESSING_INTERVAL) +sources_scheduler = LocalScheduler('sources', _(u'Document sources')) +sources_scheduler.add_interval_job('task_fetch_pop3_emails', _(u'Connects to the POP3 email sources and fetches the attached documents.'), task_fetch_pop3_emails, seconds=EMAIL_PROCESSING_INTERVAL) +sources_scheduler.add_interval_job('task_fetch_imap_emails', _(u'Connects to the IMAP email sources and fetches the attached documents.'), task_fetch_imap_emails, seconds=EMAIL_PROCESSING_INTERVAL) +sources_scheduler.start() bind_links(['document_list_recent', 'document_list', 'document_create', 'document_create_multiple', 'upload_interactive', 'staging_file_delete'], [document_create_multiple], menu_name='secondary_menu') bind_links([Document], [document_create_multiple], menu_name='secondary_menu') diff --git a/apps/sources/models.py b/apps/sources/models.py index 31b89b66f0..9c779eebe7 100644 --- a/apps/sources/models.py +++ b/apps/sources/models.py @@ -27,7 +27,6 @@ from converter.literals import DIMENSION_SEPARATOR from documents.models import Document, DocumentType from documents.events import history_document_created from metadata.api import save_metadata_list -from scheduler.api import register_interval_job, remove_job from acls.utils import apply_default_acls from .managers import SourceTransformationManager, SourceLogManager @@ -43,6 +42,7 @@ from .literals import (SOURCE_CHOICES, SOURCE_CHOICES_PLURAL, IMAP_DEFAULT_MAILBOX) from .compressed_file import CompressedFile, NotACompressedFile from .conf.settings import POP3_TIMEOUT +#from . import sources_scheduler logger = logging.getLogger(__name__) @@ -441,17 +441,18 @@ class WatchFolder(BaseModel): interval = models.PositiveIntegerField(verbose_name=_(u'interval'), help_text=_(u'Inverval in seconds where the watch folder path is checked for new documents.')) def save(self, *args, **kwargs): - if self.pk: - remove_job(self.internal_name()) + #if self.pk: + # remove_job(self.internal_name()) super(WatchFolder, self).save(*args, **kwargs) self.schedule() def schedule(self): - if self.enabled: - register_interval_job(self.internal_name(), - title=self.fullname(), func=self.execute, - kwargs={'source_id': self.pk}, seconds=self.interval - ) + pass + #if self.enabled: + # sources_scheduler.add_interval_job(self.internal_name(), + # title=self.fullname(), function=self.execute, + # seconds=self.interval, kwargs={'source_id': self.pk} + # ) def execute(self, source_id): source = WatchFolder.objects.get(pk=source_id) From 8f08296e7b10931cf4c2563fbc03c86af6cf60a1 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 1 Aug 2012 01:44:55 -0400 Subject: [PATCH 37/40] Update bind_links to accept single links as well as lists for the links argument --- apps/navigation/api.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/navigation/api.py b/apps/navigation/api.py index e21f6b1050..42c7aa16ec 100644 --- a/apps/navigation/api.py +++ b/apps/navigation/api.py @@ -158,7 +158,11 @@ def bind_links(sources, links, menu_name=None, position=0): bound_links.setdefault(menu_name, {}) for source in sources: bound_links[menu_name].setdefault(source, {'links': []}) - bound_links[menu_name][source]['links'].extend(links) + try: + bound_links[menu_name][source]['links'].extend(links) + except TypeError: + # Try to see if links is a single link + bound_links[menu_name][source]['links'].append(links) def register_top_menu(name, link, position=None): From 8f714a9fa90ae83683395307cee720a38329b43f Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 1 Aug 2012 01:46:09 -0400 Subject: [PATCH 38/40] Add reminder --- apps/clustering/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/clustering/models.py b/apps/clustering/models.py index 764691aadc..b083829f74 100644 --- a/apps/clustering/models.py +++ b/apps/clustering/models.py @@ -78,6 +78,7 @@ class ClusteringConfigManager(models.Manager): class ClusteringConfig(Singleton): node_time_to_live = models.PositiveIntegerField(verbose_name=(u'time to live (in seconds)'), default=DEFAULT_NODE_TTL) # After this time a worker is considered dead node_heartbeat_interval = models.PositiveIntegerField(verbose_name=(u'heartbeat interval'), default=DEFAULT_NODE_HEARTBEAT_INTERVAL) + # TODO: add validation, interval cannot be greater than TTL objects = ClusteringConfigManager() From 74b87026cabefecdb0e8fda32c25679f76f3ce8b Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 1 Aug 2012 01:46:26 -0400 Subject: [PATCH 39/40] Make a node worker count appear after it's main list view attributes --- apps/clustering/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/clustering/views.py b/apps/clustering/views.py index efaafe1ad5..e00f7b4da2 100644 --- a/apps/clustering/views.py +++ b/apps/clustering/views.py @@ -22,7 +22,7 @@ def node_list(request): context = { 'object_list': Node.objects.all(), 'title': _(u'nodes'), - 'extra_columns': [ + 'extra_columns_preffixed': [ { 'name': _(u'hostname'), 'attribute': 'hostname', From cdb6e7c6c68ef71c9058e890295a62e03445d6fe Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 1 Aug 2012 01:47:08 -0400 Subject: [PATCH 40/40] Spelling update --- apps/job_processor/links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/job_processor/links.py b/apps/job_processor/links.py index 222c95d04c..a3fffee82b 100644 --- a/apps/job_processor/links.py +++ b/apps/job_processor/links.py @@ -12,7 +12,7 @@ from .permissions import PERMISSION_JOB_QUEUE_VIEW node_workers = Link(text=_(u'workers'), view='node_workers', args='object.pk', sprite='lorry_go', permissions=[PERMISSION_NODES_VIEW]) tool_link = Link(text=_(u'job queues'), view='job_queues', icon='hourglass.png', permissions=[PERMISSION_JOB_QUEUE_VIEW]) -job_queues = Link(text=_(u'job queues list'), view='job_queues', sprite='hourglass', permissions=[PERMISSION_JOB_QUEUE_VIEW]) +job_queues = Link(text=_(u'job queue list'), view='job_queues', sprite='hourglass', permissions=[PERMISSION_JOB_QUEUE_VIEW]) job_queue_items_pending = Link(text=_(u'pending jobs'), view='job_queue_items_pending', args='object.pk', sprite='text_list_bullets', permissions=[PERMISSION_JOB_QUEUE_VIEW]) job_queue_items_error = Link(text=_(u'error jobs'), view='job_queue_items_error', args='object.pk', sprite='text_list_bullets', permissions=[PERMISSION_JOB_QUEUE_VIEW]) job_queue_items_active = Link(text=_(u'active jobs'), view='job_queue_items_active', args='object.pk', sprite='text_list_bullets', permissions=[PERMISSION_JOB_QUEUE_VIEW])