diff options
-rwxr-xr-x[-rw-r--r--] | abs/core/xymon/alerts.cfg | 128 | ||||
-rw-r--r-- | abs/core/xymon/hobbit_notify.sh | 67 |
2 files changed, 174 insertions, 21 deletions
diff --git a/abs/core/xymon/alerts.cfg b/abs/core/xymon/alerts.cfg index e458655..4f97f18 100644..100755 --- a/abs/core/xymon/alerts.cfg +++ b/abs/core/xymon/alerts.cfg @@ -1,6 +1,128 @@ +# +# The alerts.cfg file controls who receives alerts +# when a status in the XYmon system goes into a critical +# state (usually: red, yellow or purple). +# +# This file is made up from RULES and RECIPIENTS. +# +# A RULE is a filter made from the PAGE where a host +# is located in XYmon; the HOST name, the SERVICE name, +# the COLOR of the status, the TIME of day, and the +# DURATION of the event. +# +# A RECIPIENT can be a MAIL address, or a SCRIPT. +# +# Recipients can also have rules associated with them, +# that modify the rules for a single recipient, e.g. +# you can define a rule for alerting, then add an +# extra criteria e.g. so a single recipient does not get +# alerted until after 20 minutes. +# +# A sample rule: +# +# HOST=www.foo.com SERVICE=http +# MAIL webadmin@foo.com REPEAT=20 RECOVERED +# MAIL cio@foo.com DURATION>60 COLOR=red +# SCRIPT /usr/local/bin/sendsms 1234567890 FORMAT=SMS +# +# The first line sets up a rule that catches alerts +# for the host "www.foo.com" and the "http" service. +# There are three recipients for these alerts: The first +# one is the "webadmin@foo.com" - they get alerted +# immediately when the status goes into an alert state, +# and the alert is repeated every 20 minutes until it +# recovers. When it recovers, a message is sent about +# the recovery. +# +# The second recipient is "cio@foo.com". He gets alerted +# only when the service goes "red" for more than 60 minutes. +# +# The third recipient is a script, "/usr/local/bin/sendsms". +# The real recipient is "1234567890", but it is handled +# by the script - the script receives a set of environment +# variables with the details about the alert, including the +# real recipient. The alert message is preformatted for +# an SMS recipient. +# +# You can use Perl-compatible "regular expressions" for +# the PAGE, HOST and SERVICE definitions, by putting a "%" +# in front of the regex. E.g. +# +# HOST=%^www.* +# MAIL webadmin@foo.com EXHOST=www.testsite.foo.com +# +# This sets up a rule so that alerts from any hostname +# beginning with "www" goes to "webadmin@foo.com", EXCEPT +# alerts from "www.testsite.foo.com" +# +# The following keywords are recognized: +# PAGE - rule matching an alert by the name of the +# page in XYmon. This is the name following +# the "page", "subpage" or "subparent" keyword +# in the hosts.cfg file. +# EXPAGE - rule excluding an alert if the pagename matches. +# HOST - rule matching an alert by the hostname. +# EXHOST - rule excluding an alert by matching the hostname. +# SERVICE - rule matching an alert by the service name. +# EXSERVICE - rule excluding an alert by matching the hostname. +# GROUP - rule matching an alert by the group ID. +# (Group ID's are associated with a status through the +# analysis.cfg configuration). +# EXGROUP - rule excluding an alert by matching the group ID. +# COLOR - rule matching an alert by color. Can be "red", +# "yellow", or "purple". +# TIME - rule matching an alert by the time-of-day. This +# is specified as the DOWNTIME timespecification +# in the hosts.cfg file (see hosts.cfg(5)). +# DURATION - Rule matcing an alert if the event has lasted +# longer/shorter than the given duration. E.g. +# DURATION>10 (lasted longer than 10 minutes) or +# DURARION<30 (only sends alerts the first 30 minutes). +# RECOVERED - Rule matches if the alert has recovered from an +# alert state. +# NOTICE - Rule matches if the message is a "notify" message +# (typically sent when a status is enabled or disabled). +# MAIL - Recipient who receives an e-mail alert. This takes +# one parameter, the e-mail address. +# SCRIPT - Recipient that invokes a script. This takes two +# parameters: The script filename, and the recipient +# that gets passed to the script. +# FORMAT - format of the text message with the alert. Default +# is "TEXT" (suitable for e-mail alerts). "SMS" is +# a short message with no subject for SMS alerts. +# "SCRIPT" is a brief message template for scripts. +# REPEAT - How often an alert gets repeated, in minutes. +# STOP - Valid for a recipient: If this recipient gets an +# alert, recipients further down in alerts.cfg +# are ignored. +# UNMATCHED - Matches if no alerts have been sent so far. +# +# +# Script get the following environment variables pre-defined so +# that they can send a meaningful alert: +# +# BBCOLORLEVEL - The color of the alert: "red", "yellow" or "purple" +# BBALPHAMSG - The full text of the status log triggering the alert +# ACKCODE - The "cookie" that can be used to acknowledge the alert +# RCPT - The recipient, from the SCRIPT entry +# BBHOSTNAME - The name of the host that the alert is about +# MACHIP - The IP-address of the host that has a problem +# BBSVCNAME - The name of the service that the alert is about +# BBSVCNUM - The numeric code for the service. From SVCCODES definition. +# BBHOSTSVC - HOSTNAME.SERVICE that the alert is about. +# BBHOSTSVCCOMMAS - As BBHOSTSVC, but dots in the hostname replaced with commas +# BBNUMERIC - A 22-digit number made by BBSVCNUM, MACHIP and ACKCODE. +# RECOVERED - Is "1" if the service has recovered. +# DOWNSECS - Number of seconds the service has been down. +# DOWNSECSMSG - When recovered, holds the text "Event duration : N" where +# N is the DOWNSECS value. -HOST=* SERVICE=disk - SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS REPEAT=10h COLOR=yellow - SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS REPEAT=1h COLOR=red + +HOST=* SERVICE=func + SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS DURATION=4h REPEAT=48h COLOR=red + +HOST=* SERVICE=* EXSERVICE=conn + SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS DURATION=4h REPEAT=12h COLOR=red +# SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS DURATION=4H REPEAT=12H COLOR=yellow diff --git a/abs/core/xymon/hobbit_notify.sh b/abs/core/xymon/hobbit_notify.sh index 7569e2a..742b3db 100644 --- a/abs/core/xymon/hobbit_notify.sh +++ b/abs/core/xymon/hobbit_notify.sh @@ -1,4 +1,5 @@ #!/bin/bash +. /etc/systemconfig # # BBCOLORLEVEL - The color of the alert: "red", "yellow" or "purple" # BBALPHAMSG - The full text of the status log triggering the alert @@ -14,35 +15,65 @@ # RECOVERED - Is "1" if the service has recovered. # DOWNSECS - Number of seconds the service has been down. # DOWNSECSMSG - When recovered, holds the text "Event duration : N" where -echo $BBCOLORLEVEL > /tmp/hobbitout -echo $BBALPHAMSG >>/tmp/hobbitout -echo $RCPT >>/tmp/hobbitout -echo $BBHOSTNAME >>/tmp/hobbitout -echo $BBSVCNAME >>/tmp/hobbitout -echo $BBHOSTSVC >>/tmp/hobbitout + + +# echo $BBCOLORLEVEL > /tmp/hobbitout +# echo $BBALPHAMSG >>/tmp/hobbitout +# echo $RCPT >>/tmp/hobbitout +# echo $BBHOSTNAME >>/tmp/hobbitout +# echo $BBSVCNAME >>/tmp/hobbitout +# echo $BBHOSTSVC >>/tmp/hobbitout #BBCOLORLEVEL=red #BBCOLORLEVEL=yellow #BBALPHAMSG="vmtest:disk red [751147]" -#BBHOSTNAME="vmtest" +#BBHOSTNAME="testcraa" #BBSVCNAME="disk" +#BBSVCNAME="func" #BBHOSTSVC="vmtest.disk" -if [ x$BBSVCNAME = xdisk ] -then - case $BBCOLORLEVEL in - red ) - /usr/bin/notify.py ALT "$BBHOSTNAME disk is full" - ;; - yellow ) +#/usr/LH/bin/msg_client.py --msg "$BBCOLORLEVEL\n$BBHOSTSVC" +#/usr/bin/func "*" call msg display + +function msg_func_out() { + /usr/bin/func "*" call msg display "${1}" +} + +function msg_local_out() { + + /usr/LH/bin/msg_client.py --msg "${1}" + + } + - /usr/bin/notify.py PHN "$BBHOSTNAME disk almost full " - ;; - esac -fi +case x$BBSVCNAME in + xdisk) + case $BBCOLORLEVEL in + red ) + msg_func_out "$BBHOSTNAME disk is at 95% full or greater|alert" + ;; + yellow ) + msg_func_out "$BBHOSTNAME disk is almost full" + ;; + esac + ;; + + xfunc) + if [ "x$hostname" = "x$BBHOSTNAME" ] + then + msg_local_out "System can not communicate with itself (func) |alert" + else + msg_func_out "$hostname can not communicate with $BBHOSTNAME (func)|alert" + fi + ;; + *) + #catch all for everything else + msg_func_out "Condition $BBCOLORLEVEL\n$BBHOSTNAME $BBSVCNAME\n Please check the system health webpage|alert" + ;; +esac |