Explicit heartbeat update method, only check healthy nodes for HB timeout, default node state on creation is 'dead'
This commit is contained in:
@@ -13,5 +13,5 @@ NODE_STATE_CHOICES = (
|
||||
)
|
||||
|
||||
DEFAULT_JOB_QUEUE_POLL_INTERVAL = 2
|
||||
DEFAULT_NODE_CPU_LOAD = 100
|
||||
DEFAULT_NODE_MEMORY_USAGE = 100
|
||||
DEFAULT_NODE_CPU_LOAD = 0
|
||||
DEFAULT_NODE_MEMORY_USAGE = 0
|
||||
|
||||
@@ -13,7 +13,9 @@ from django.utils.translation import ugettext, ugettext_lazy as _
|
||||
from common.models import Singleton
|
||||
|
||||
from .literals import (DEFAULT_NODE_HEARTBEAT_INTERVAL, DEFAULT_NODE_HEARTBEAT_TIMEOUT,
|
||||
DEFAULT_DEAD_NODE_REMOVAL_INTERVAL, NODE_STATE_HEALTHY, NODE_STATE_CHOICES, NODE_STATE_DEAD)
|
||||
DEFAULT_DEAD_NODE_REMOVAL_INTERVAL, NODE_STATE_HEALTHY, NODE_STATE_CHOICES, NODE_STATE_DEAD,
|
||||
DEFAULT_NODE_CPU_LOAD, DEFAULT_NODE_MEMORY_USAGE)
|
||||
from .signals import node_died
|
||||
|
||||
|
||||
class NodeManager(models.Manager):
|
||||
@@ -28,12 +30,12 @@ class NodeManager(models.Manager):
|
||||
|
||||
class Node(models.Model):
|
||||
hostname = models.CharField(max_length=255, verbose_name=_(u'hostname'))
|
||||
cpuload = models.FloatField(blank=True, default=0.0, verbose_name=_(u'cpu load'))
|
||||
cpuload = models.FloatField(blank=True, default=DEFAULT_NODE_CPU_LOAD, verbose_name=_(u'cpu load'))
|
||||
heartbeat = models.DateTimeField(blank=True, default=datetime.datetime.now(), verbose_name=_(u'last heartbeat check'))
|
||||
memory_usage = models.FloatField(blank=True, default=0.0, verbose_name=_(u'memory usage'))
|
||||
memory_usage = models.FloatField(blank=True, default=DEFAULT_NODE_MEMORY_USAGE, verbose_name=_(u'memory usage'))
|
||||
state = models.CharField(max_length=4,
|
||||
choices=NODE_STATE_CHOICES,
|
||||
default=NODE_STATE_HEALTHY,
|
||||
default=NODE_STATE_DEAD,
|
||||
verbose_name=_(u'state'))
|
||||
|
||||
objects = NodeManager()
|
||||
@@ -50,17 +52,24 @@ class Node(models.Model):
|
||||
|
||||
def refresh(self):
|
||||
if self.hostname == platform.node():
|
||||
# Make we can only update ourselves
|
||||
# Make sure we can only update ourselves
|
||||
info = Node.platform_info()
|
||||
self.cpuload = info['cpuload']
|
||||
self.memory_usage = info['memory_usage']
|
||||
|
||||
def if_healthy(self):
|
||||
return self.health == NODE_STATE_HEALTHY
|
||||
def is_healthy(self):
|
||||
return self.state == NODE_STATE_HEALTHY
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
def mark_as_dead(self):
|
||||
self.state=NODE_STATE_DEAD
|
||||
node_died.send(sender=self)
|
||||
self.save()
|
||||
|
||||
def send_heartbeat(self):
|
||||
self.refresh()
|
||||
self.state=NODE_STATE_HEALTHY
|
||||
self.heartbeat = datetime.datetime.now()
|
||||
return super(Node, self).save(*args, **kwargs)
|
||||
self.save()
|
||||
|
||||
class Meta:
|
||||
verbose_name = _(u'node')
|
||||
@@ -69,10 +78,11 @@ class Node(models.Model):
|
||||
|
||||
class ClusteringConfigManager(models.Manager):
|
||||
def dead_nodes(self):
|
||||
return self.model.objects.filter(heartbeat__lt=datetime.datetime.now() - datetime.timedelta(seconds=self.model.get().node_heartbeat_timeout))
|
||||
return Node.objects.filter(state=NODE_STATE_HEALTHY).filter(heartbeat__lt=datetime.datetime.now() - datetime.timedelta(seconds=self.model.get().node_heartbeat_timeout))
|
||||
|
||||
def check_dead_nodes(self):
|
||||
self.dead_nodes().update(healty=NODE_STATE_DEAD)
|
||||
for node in self.dead_nodes():
|
||||
node.mark_as_dead()
|
||||
|
||||
def zombiest_node(self):
|
||||
try:
|
||||
@@ -86,7 +96,7 @@ class ClusteringConfig(Singleton):
|
||||
node_heartbeat_timeout = models.PositiveIntegerField(verbose_name=(u'node heartbeat timeout (in seconds)'), help_text=_(u'After this amount of time a node without heartbeat updates is considered dead and removed from the cluster node list.'), default=DEFAULT_NODE_HEARTBEAT_TIMEOUT)
|
||||
dead_node_removal_interval = models.PositiveIntegerField(verbose_name=(u'dead node check and removal interval (in seconds)'), help_text=_(u'Interval of time to check the cluster for unresponsive nodes and remove them from the cluster.'), default=DEFAULT_DEAD_NODE_REMOVAL_INTERVAL)
|
||||
|
||||
objects = ClusteringConfigManager()
|
||||
cluster = ClusteringConfigManager()
|
||||
|
||||
def __unicode__(self):
|
||||
return ugettext('clustering config')
|
||||
|
||||
3
apps/clustering/signals.py
Normal file
3
apps/clustering/signals.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from django.dispatch import Signal
|
||||
|
||||
node_died = Signal(providing_args=['node'])
|
||||
@@ -15,11 +15,11 @@ logger = logging.getLogger(__name__)
|
||||
def node_heartbeat():
|
||||
logger.debug('starting')
|
||||
node = Node.objects.myself()
|
||||
node.save()
|
||||
node.send_heartbeat()
|
||||
|
||||
|
||||
@simple_locking('house_keeping', 10)
|
||||
def house_keeping():
|
||||
logger.debug('starting')
|
||||
ClusteringConfig.objects.check_dead_nodes()
|
||||
ClusteringConfig.cluster.check_dead_nodes()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user