100 lines
3.5 KiB
Python
100 lines
3.5 KiB
Python
from __future__ import absolute_import
|
|
|
|
import atexit
|
|
import logging
|
|
import psutil
|
|
from multiprocessing import active_children
|
|
|
|
from django.db import transaction, DatabaseError
|
|
from django.dispatch import receiver
|
|
from django.utils.translation import ugettext_lazy as _
|
|
|
|
#from backups.api import AppBackup, ModelBackup
|
|
#from app_registry import register_app, UnableToRegister
|
|
from scheduler.api import LocalScheduler
|
|
from navigation.api import bind_links, register_model_list_columns
|
|
from project_tools.api import register_tool
|
|
from project_setup.api import register_setup
|
|
from common.utils import encapsulate
|
|
|
|
from clustering.models import Node
|
|
from clustering.signals import node_died, node_heartbeat
|
|
|
|
from .models import JobQueue, JobProcessingConfig, JobQueueItem, Worker
|
|
from .tasks import job_queue_poll, house_keeping
|
|
from .links import (node_workers, job_queues, tool_link,
|
|
job_queue_items_pending, job_queue_items_error, job_queue_items_active,
|
|
job_queue_config_edit, setup_link, job_queue_start, job_queue_stop,
|
|
job_requeue, job_delete, worker_terminate)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@transaction.commit_on_success
|
|
def add_job_queue_jobs():
|
|
job_processor_scheduler = LocalScheduler('job_processor', _(u'Job processor'))
|
|
try:
|
|
job_processor_scheduler.add_interval_job('job_queue_poll', _(u'Poll a job queue for pending jobs.'), job_queue_poll, seconds=JobProcessingConfig.get().job_queue_poll_interval)
|
|
job_processor_scheduler.add_interval_job('house_keeping', _(u'Poll a job queue for pending jobs.'), house_keeping, seconds=JobProcessingConfig.get().dead_job_removal_interval)
|
|
except DatabaseError:
|
|
transaction.rollback()
|
|
|
|
job_processor_scheduler.start()
|
|
|
|
|
|
add_job_queue_jobs()
|
|
register_tool(tool_link)
|
|
register_setup(setup_link)
|
|
bind_links([JobQueue, 'job_queues'], [job_queues], menu_name='secondary_menu')
|
|
bind_links([JobQueue], [job_queue_start, job_queue_stop, job_queue_items_pending, job_queue_items_active, job_queue_items_error])
|
|
bind_links([Node], [node_workers])
|
|
bind_links(['job_queue_config_edit'], [job_queue_config_edit], menu_name='secondary_menu')
|
|
bind_links([JobQueueItem], [job_requeue, job_delete])
|
|
bind_links([Worker], [worker_terminate])
|
|
|
|
Node.add_to_class('workers', lambda node: node.worker_set)
|
|
|
|
register_model_list_columns(Node, [
|
|
{
|
|
'name': _(u'active workers'),
|
|
'attribute': encapsulate(lambda x: x.workers().all().count())
|
|
},
|
|
])
|
|
|
|
|
|
@receiver(node_died, dispatch_uid='process_dead_workers')
|
|
def process_dead_workers(sender, node, **kwargs):
|
|
logger.debug('received signal')
|
|
for dead_worker in node.worker_set.all():
|
|
if dead_worker.job_queue_item:
|
|
dead_worker.job_queue_item.requeue(force=True, at_top=True)
|
|
dead_worker.delete()
|
|
|
|
|
|
@receiver(node_heartbeat, dispatch_uid='node_processes')
|
|
def node_processes(sender, node, **kwargs):
|
|
logger.debug('update current node\'s processes')
|
|
all_active_pids = psutil.get_pid_list()
|
|
# Remove stale workers based on current child pids
|
|
for dead_worker in node.worker_set.exclude(pid__in=all_active_pids):
|
|
if dead_worker.job_queue_item:
|
|
dead_worker.job_queue_item.requeue(force=True, at_top=True)
|
|
dead_worker.delete()
|
|
|
|
|
|
def kill_all_node_processes():
|
|
logger.debug('terminating this node\'s all processes')
|
|
for process in active_children():
|
|
process.terminate()
|
|
process.join()
|
|
|
|
|
|
atexit.register(kill_all_node_processes)
|
|
|
|
#try:
|
|
# app = register_app('job_processor', _(u'Job processor'))
|
|
#except UnableToRegister:
|
|
# pass
|
|
#else:
|
|
# AppBackup(app, [ModelBackup()])
|