summaryrefslogtreecommitdiffstats
path: root/abs/core/xymon
diff options
context:
space:
mode:
Diffstat (limited to 'abs/core/xymon')
-rwxr-xr-x[-rw-r--r--]abs/core/xymon/alerts.cfg128
-rw-r--r--abs/core/xymon/hobbit_notify.sh67
2 files changed, 174 insertions, 21 deletions
diff --git a/abs/core/xymon/alerts.cfg b/abs/core/xymon/alerts.cfg
index e458655..4f97f18 100644..100755
--- a/abs/core/xymon/alerts.cfg
+++ b/abs/core/xymon/alerts.cfg
@@ -1,6 +1,128 @@
+#
+# The alerts.cfg file controls who receives alerts
+# when a status in the XYmon system goes into a critical
+# state (usually: red, yellow or purple).
+#
+# This file is made up from RULES and RECIPIENTS.
+#
+# A RULE is a filter made from the PAGE where a host
+# is located in XYmon; the HOST name, the SERVICE name,
+# the COLOR of the status, the TIME of day, and the
+# DURATION of the event.
+#
+# A RECIPIENT can be a MAIL address, or a SCRIPT.
+#
+# Recipients can also have rules associated with them,
+# that modify the rules for a single recipient, e.g.
+# you can define a rule for alerting, then add an
+# extra criteria e.g. so a single recipient does not get
+# alerted until after 20 minutes.
+#
+# A sample rule:
+#
+# HOST=www.foo.com SERVICE=http
+# MAIL webadmin@foo.com REPEAT=20 RECOVERED
+# MAIL cio@foo.com DURATION>60 COLOR=red
+# SCRIPT /usr/local/bin/sendsms 1234567890 FORMAT=SMS
+#
+# The first line sets up a rule that catches alerts
+# for the host "www.foo.com" and the "http" service.
+# There are three recipients for these alerts: The first
+# one is the "webadmin@foo.com" - they get alerted
+# immediately when the status goes into an alert state,
+# and the alert is repeated every 20 minutes until it
+# recovers. When it recovers, a message is sent about
+# the recovery.
+#
+# The second recipient is "cio@foo.com". He gets alerted
+# only when the service goes "red" for more than 60 minutes.
+#
+# The third recipient is a script, "/usr/local/bin/sendsms".
+# The real recipient is "1234567890", but it is handled
+# by the script - the script receives a set of environment
+# variables with the details about the alert, including the
+# real recipient. The alert message is preformatted for
+# an SMS recipient.
+#
+# You can use Perl-compatible "regular expressions" for
+# the PAGE, HOST and SERVICE definitions, by putting a "%"
+# in front of the regex. E.g.
+#
+# HOST=%^www.*
+# MAIL webadmin@foo.com EXHOST=www.testsite.foo.com
+#
+# This sets up a rule so that alerts from any hostname
+# beginning with "www" goes to "webadmin@foo.com", EXCEPT
+# alerts from "www.testsite.foo.com"
+#
+# The following keywords are recognized:
+# PAGE - rule matching an alert by the name of the
+# page in XYmon. This is the name following
+# the "page", "subpage" or "subparent" keyword
+# in the hosts.cfg file.
+# EXPAGE - rule excluding an alert if the pagename matches.
+# HOST - rule matching an alert by the hostname.
+# EXHOST - rule excluding an alert by matching the hostname.
+# SERVICE - rule matching an alert by the service name.
+# EXSERVICE - rule excluding an alert by matching the hostname.
+# GROUP - rule matching an alert by the group ID.
+# (Group ID's are associated with a status through the
+# analysis.cfg configuration).
+# EXGROUP - rule excluding an alert by matching the group ID.
+# COLOR - rule matching an alert by color. Can be "red",
+# "yellow", or "purple".
+# TIME - rule matching an alert by the time-of-day. This
+# is specified as the DOWNTIME timespecification
+# in the hosts.cfg file (see hosts.cfg(5)).
+# DURATION - Rule matcing an alert if the event has lasted
+# longer/shorter than the given duration. E.g.
+# DURATION>10 (lasted longer than 10 minutes) or
+# DURARION<30 (only sends alerts the first 30 minutes).
+# RECOVERED - Rule matches if the alert has recovered from an
+# alert state.
+# NOTICE - Rule matches if the message is a "notify" message
+# (typically sent when a status is enabled or disabled).
+# MAIL - Recipient who receives an e-mail alert. This takes
+# one parameter, the e-mail address.
+# SCRIPT - Recipient that invokes a script. This takes two
+# parameters: The script filename, and the recipient
+# that gets passed to the script.
+# FORMAT - format of the text message with the alert. Default
+# is "TEXT" (suitable for e-mail alerts). "SMS" is
+# a short message with no subject for SMS alerts.
+# "SCRIPT" is a brief message template for scripts.
+# REPEAT - How often an alert gets repeated, in minutes.
+# STOP - Valid for a recipient: If this recipient gets an
+# alert, recipients further down in alerts.cfg
+# are ignored.
+# UNMATCHED - Matches if no alerts have been sent so far.
+#
+#
+# Script get the following environment variables pre-defined so
+# that they can send a meaningful alert:
+#
+# BBCOLORLEVEL - The color of the alert: "red", "yellow" or "purple"
+# BBALPHAMSG - The full text of the status log triggering the alert
+# ACKCODE - The "cookie" that can be used to acknowledge the alert
+# RCPT - The recipient, from the SCRIPT entry
+# BBHOSTNAME - The name of the host that the alert is about
+# MACHIP - The IP-address of the host that has a problem
+# BBSVCNAME - The name of the service that the alert is about
+# BBSVCNUM - The numeric code for the service. From SVCCODES definition.
+# BBHOSTSVC - HOSTNAME.SERVICE that the alert is about.
+# BBHOSTSVCCOMMAS - As BBHOSTSVC, but dots in the hostname replaced with commas
+# BBNUMERIC - A 22-digit number made by BBSVCNUM, MACHIP and ACKCODE.
+# RECOVERED - Is "1" if the service has recovered.
+# DOWNSECS - Number of seconds the service has been down.
+# DOWNSECSMSG - When recovered, holds the text "Event duration : N" where
+# N is the DOWNSECS value.
-HOST=* SERVICE=disk
- SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS REPEAT=10h COLOR=yellow
- SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS REPEAT=1h COLOR=red
+
+HOST=* SERVICE=func
+ SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS DURATION=4h REPEAT=48h COLOR=red
+
+HOST=* SERVICE=* EXSERVICE=conn
+ SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS DURATION=4h REPEAT=12h COLOR=red
+# SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS DURATION=4H REPEAT=12H COLOR=yellow
diff --git a/abs/core/xymon/hobbit_notify.sh b/abs/core/xymon/hobbit_notify.sh
index 7569e2a..742b3db 100644
--- a/abs/core/xymon/hobbit_notify.sh
+++ b/abs/core/xymon/hobbit_notify.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+. /etc/systemconfig
#
# BBCOLORLEVEL - The color of the alert: "red", "yellow" or "purple"
# BBALPHAMSG - The full text of the status log triggering the alert
@@ -14,35 +15,65 @@
# RECOVERED - Is "1" if the service has recovered.
# DOWNSECS - Number of seconds the service has been down.
# DOWNSECSMSG - When recovered, holds the text "Event duration : N" where
-echo $BBCOLORLEVEL > /tmp/hobbitout
-echo $BBALPHAMSG >>/tmp/hobbitout
-echo $RCPT >>/tmp/hobbitout
-echo $BBHOSTNAME >>/tmp/hobbitout
-echo $BBSVCNAME >>/tmp/hobbitout
-echo $BBHOSTSVC >>/tmp/hobbitout
+
+
+# echo $BBCOLORLEVEL > /tmp/hobbitout
+# echo $BBALPHAMSG >>/tmp/hobbitout
+# echo $RCPT >>/tmp/hobbitout
+# echo $BBHOSTNAME >>/tmp/hobbitout
+# echo $BBSVCNAME >>/tmp/hobbitout
+# echo $BBHOSTSVC >>/tmp/hobbitout
#BBCOLORLEVEL=red
#BBCOLORLEVEL=yellow
#BBALPHAMSG="vmtest:disk red [751147]"
-#BBHOSTNAME="vmtest"
+#BBHOSTNAME="testcraa"
#BBSVCNAME="disk"
+#BBSVCNAME="func"
#BBHOSTSVC="vmtest.disk"
-if [ x$BBSVCNAME = xdisk ]
-then
- case $BBCOLORLEVEL in
- red )
- /usr/bin/notify.py ALT "$BBHOSTNAME disk is full"
- ;;
- yellow )
+#/usr/LH/bin/msg_client.py --msg "$BBCOLORLEVEL\n$BBHOSTSVC"
+#/usr/bin/func "*" call msg display
+
+function msg_func_out() {
+ /usr/bin/func "*" call msg display "${1}"
+}
+
+function msg_local_out() {
+
+ /usr/LH/bin/msg_client.py --msg "${1}"
+
+ }
+
- /usr/bin/notify.py PHN "$BBHOSTNAME disk almost full "
- ;;
- esac
-fi
+case x$BBSVCNAME in
+ xdisk)
+ case $BBCOLORLEVEL in
+ red )
+ msg_func_out "$BBHOSTNAME disk is at 95% full or greater|alert"
+ ;;
+ yellow )
+ msg_func_out "$BBHOSTNAME disk is almost full"
+ ;;
+ esac
+ ;;
+
+ xfunc)
+ if [ "x$hostname" = "x$BBHOSTNAME" ]
+ then
+ msg_local_out "System can not communicate with itself (func) |alert"
+ else
+ msg_func_out "$hostname can not communicate with $BBHOSTNAME (func)|alert"
+ fi
+ ;;
+ *)
+ #catch all for everything else
+ msg_func_out "Condition $BBCOLORLEVEL\n$BBHOSTNAME $BBSVCNAME\n Please check the system health webpage|alert"
+ ;;
+esac