#!/bin/ksh # # Servercheck - thats all it does - # Script to check the server health for various processes # # Written from scratch by Gregg Lain 10/13/2007 gregg@mochabomb.com # ########################################################################################################### # # Variables # email=`grep email /usr/local/ervercheck.conf | sed 's/email: //'` pager=`grep pager /usr/local/servercheck.conf | sed 's/pager: //'` timestamp=`date '+%m-%d-%y-%H:%M:%S'` logdir="/var/log" tmpfile="$logdir/$timestamp" touch $tmpfile Server=`hostname` # ########################################################################################################## # # Initialize the incident flag - since we run with a cron, this is not in a loop... incidentflag=0 # ########################################################################################################## # # 1. Check out a process function # for procstatus in `grep service /usr/local/servercheck.conf | egrep -v '^#' | sed 's/service: //'`; do status=`/sbin/service $procstatus status` echo $status | egrep 'running|OK' 1> /dev/null if [ $? -ne 0 ]; then # something is not running # incidentflag=$(($incidentflag+1)) incidentflag=1 echo "(servercheck) $Server: $procstatus not running: $timestamp " >> $tmpfile mail $email -s "$Server: $procstatus not running" < $tmpfile /sbin/service $procstatus restart restart=$? servicePID=`ps -ef | grep $procstatus | egrep -v grep | head -1 | awk {'print $2'}` if [ $restart -ne 0 ]; then # something cannot be started echo "Alert! $Server $procstatus cannot be started" > $tmpfile echo "Restart $procstatus via shell or webmin" >> $tmpfile mail $email -s "** Alert ** $Server: $procstatus cannot be started" < $tmpfile fi if [ $restart -eq 0 ]; then # something was restarted echo "$Server $procstatus re-started" > $tmpfile echo "$procstatus running with PID of $servicePID" >> $tmpfile mail $email -s "$Server $procstatus restarted successfully" < $tmpfile fi fi done # # ######################################################################################################## # # 3. Garbage collection # cat $tmpfile >> /var/log/servercheck rm $tmpfile