From c686ac72e3f81c94dcbc1c1996b83fe33161b7a4 Mon Sep 17 00:00:00 2001 From: James Meyer Date: Mon, 25 Nov 2013 20:58:51 -0600 Subject: xymon-server: hook up xymon alerts to the osd. System will push out alerts via func to all hosts for any red conditions excluding the ping test. Test must be red for 4hours with repeat notifications every 12 hours refs #943 --- abs/core/xymon/alerts.cfg | 128 +++++++++++++++++++++++++++++++++++++++- abs/core/xymon/hobbit_notify.sh | 67 +++++++++++++++------ 2 files changed, 174 insertions(+), 21 deletions(-) mode change 100644 => 100755 abs/core/xymon/alerts.cfg diff --git a/abs/core/xymon/alerts.cfg b/abs/core/xymon/alerts.cfg old mode 100644 new mode 100755 index e458655..4f97f18 --- a/abs/core/xymon/alerts.cfg +++ b/abs/core/xymon/alerts.cfg @@ -1,6 +1,128 @@ +# +# The alerts.cfg file controls who receives alerts +# when a status in the XYmon system goes into a critical +# state (usually: red, yellow or purple). +# +# This file is made up from RULES and RECIPIENTS. +# +# A RULE is a filter made from the PAGE where a host +# is located in XYmon; the HOST name, the SERVICE name, +# the COLOR of the status, the TIME of day, and the +# DURATION of the event. +# +# A RECIPIENT can be a MAIL address, or a SCRIPT. +# +# Recipients can also have rules associated with them, +# that modify the rules for a single recipient, e.g. +# you can define a rule for alerting, then add an +# extra criteria e.g. so a single recipient does not get +# alerted until after 20 minutes. +# +# A sample rule: +# +# HOST=www.foo.com SERVICE=http +# MAIL webadmin@foo.com REPEAT=20 RECOVERED +# MAIL cio@foo.com DURATION>60 COLOR=red +# SCRIPT /usr/local/bin/sendsms 1234567890 FORMAT=SMS +# +# The first line sets up a rule that catches alerts +# for the host "www.foo.com" and the "http" service. +# There are three recipients for these alerts: The first +# one is the "webadmin@foo.com" - they get alerted +# immediately when the status goes into an alert state, +# and the alert is repeated every 20 minutes until it +# recovers. When it recovers, a message is sent about +# the recovery. +# +# The second recipient is "cio@foo.com". He gets alerted +# only when the service goes "red" for more than 60 minutes. +# +# The third recipient is a script, "/usr/local/bin/sendsms". +# The real recipient is "1234567890", but it is handled +# by the script - the script receives a set of environment +# variables with the details about the alert, including the +# real recipient. The alert message is preformatted for +# an SMS recipient. +# +# You can use Perl-compatible "regular expressions" for +# the PAGE, HOST and SERVICE definitions, by putting a "%" +# in front of the regex. E.g. +# +# HOST=%^www.* +# MAIL webadmin@foo.com EXHOST=www.testsite.foo.com +# +# This sets up a rule so that alerts from any hostname +# beginning with "www" goes to "webadmin@foo.com", EXCEPT +# alerts from "www.testsite.foo.com" +# +# The following keywords are recognized: +# PAGE - rule matching an alert by the name of the +# page in XYmon. This is the name following +# the "page", "subpage" or "subparent" keyword +# in the hosts.cfg file. +# EXPAGE - rule excluding an alert if the pagename matches. +# HOST - rule matching an alert by the hostname. +# EXHOST - rule excluding an alert by matching the hostname. +# SERVICE - rule matching an alert by the service name. +# EXSERVICE - rule excluding an alert by matching the hostname. +# GROUP - rule matching an alert by the group ID. +# (Group ID's are associated with a status through the +# analysis.cfg configuration). +# EXGROUP - rule excluding an alert by matching the group ID. +# COLOR - rule matching an alert by color. Can be "red", +# "yellow", or "purple". +# TIME - rule matching an alert by the time-of-day. This +# is specified as the DOWNTIME timespecification +# in the hosts.cfg file (see hosts.cfg(5)). +# DURATION - Rule matcing an alert if the event has lasted +# longer/shorter than the given duration. E.g. +# DURATION>10 (lasted longer than 10 minutes) or +# DURARION<30 (only sends alerts the first 30 minutes). +# RECOVERED - Rule matches if the alert has recovered from an +# alert state. +# NOTICE - Rule matches if the message is a "notify" message +# (typically sent when a status is enabled or disabled). +# MAIL - Recipient who receives an e-mail alert. This takes +# one parameter, the e-mail address. +# SCRIPT - Recipient that invokes a script. This takes two +# parameters: The script filename, and the recipient +# that gets passed to the script. +# FORMAT - format of the text message with the alert. Default +# is "TEXT" (suitable for e-mail alerts). "SMS" is +# a short message with no subject for SMS alerts. +# "SCRIPT" is a brief message template for scripts. +# REPEAT - How often an alert gets repeated, in minutes. +# STOP - Valid for a recipient: If this recipient gets an +# alert, recipients further down in alerts.cfg +# are ignored. +# UNMATCHED - Matches if no alerts have been sent so far. +# +# +# Script get the following environment variables pre-defined so +# that they can send a meaningful alert: +# +# BBCOLORLEVEL - The color of the alert: "red", "yellow" or "purple" +# BBALPHAMSG - The full text of the status log triggering the alert +# ACKCODE - The "cookie" that can be used to acknowledge the alert +# RCPT - The recipient, from the SCRIPT entry +# BBHOSTNAME - The name of the host that the alert is about +# MACHIP - The IP-address of the host that has a problem +# BBSVCNAME - The name of the service that the alert is about +# BBSVCNUM - The numeric code for the service. From SVCCODES definition. +# BBHOSTSVC - HOSTNAME.SERVICE that the alert is about. +# BBHOSTSVCCOMMAS - As BBHOSTSVC, but dots in the hostname replaced with commas +# BBNUMERIC - A 22-digit number made by BBSVCNUM, MACHIP and ACKCODE. +# RECOVERED - Is "1" if the service has recovered. +# DOWNSECS - Number of seconds the service has been down. +# DOWNSECSMSG - When recovered, holds the text "Event duration : N" where +# N is the DOWNSECS value. -HOST=* SERVICE=disk - SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS REPEAT=10h COLOR=yellow - SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS REPEAT=1h COLOR=red + +HOST=* SERVICE=func + SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS DURATION=4h REPEAT=48h COLOR=red + +HOST=* SERVICE=* EXSERVICE=conn + SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS DURATION=4h REPEAT=12h COLOR=red +# SCRIPT /home/xymon/server/bin/hobbit_notify.sh 1234567890 FORMAT=SMS DURATION=4H REPEAT=12H COLOR=yellow diff --git a/abs/core/xymon/hobbit_notify.sh b/abs/core/xymon/hobbit_notify.sh index 7569e2a..742b3db 100644 --- a/abs/core/xymon/hobbit_notify.sh +++ b/abs/core/xymon/hobbit_notify.sh @@ -1,4 +1,5 @@ #!/bin/bash +. /etc/systemconfig # # BBCOLORLEVEL - The color of the alert: "red", "yellow" or "purple" # BBALPHAMSG - The full text of the status log triggering the alert @@ -14,35 +15,65 @@ # RECOVERED - Is "1" if the service has recovered. # DOWNSECS - Number of seconds the service has been down. # DOWNSECSMSG - When recovered, holds the text "Event duration : N" where -echo $BBCOLORLEVEL > /tmp/hobbitout -echo $BBALPHAMSG >>/tmp/hobbitout -echo $RCPT >>/tmp/hobbitout -echo $BBHOSTNAME >>/tmp/hobbitout -echo $BBSVCNAME >>/tmp/hobbitout -echo $BBHOSTSVC >>/tmp/hobbitout + + +# echo $BBCOLORLEVEL > /tmp/hobbitout +# echo $BBALPHAMSG >>/tmp/hobbitout +# echo $RCPT >>/tmp/hobbitout +# echo $BBHOSTNAME >>/tmp/hobbitout +# echo $BBSVCNAME >>/tmp/hobbitout +# echo $BBHOSTSVC >>/tmp/hobbitout #BBCOLORLEVEL=red #BBCOLORLEVEL=yellow #BBALPHAMSG="vmtest:disk red [751147]" -#BBHOSTNAME="vmtest" +#BBHOSTNAME="testcraa" #BBSVCNAME="disk" +#BBSVCNAME="func" #BBHOSTSVC="vmtest.disk" -if [ x$BBSVCNAME = xdisk ] -then - case $BBCOLORLEVEL in - red ) - /usr/bin/notify.py ALT "$BBHOSTNAME disk is full" - ;; - yellow ) +#/usr/LH/bin/msg_client.py --msg "$BBCOLORLEVEL\n$BBHOSTSVC" +#/usr/bin/func "*" call msg display + +function msg_func_out() { + /usr/bin/func "*" call msg display "${1}" +} + +function msg_local_out() { + + /usr/LH/bin/msg_client.py --msg "${1}" + + } + - /usr/bin/notify.py PHN "$BBHOSTNAME disk almost full " - ;; - esac -fi +case x$BBSVCNAME in + xdisk) + case $BBCOLORLEVEL in + red ) + msg_func_out "$BBHOSTNAME disk is at 95% full or greater|alert" + ;; + yellow ) + msg_func_out "$BBHOSTNAME disk is almost full" + ;; + esac + ;; + + xfunc) + if [ "x$hostname" = "x$BBHOSTNAME" ] + then + msg_local_out "System can not communicate with itself (func) |alert" + else + msg_func_out "$hostname can not communicate with $BBHOSTNAME (func)|alert" + fi + ;; + *) + #catch all for everything else + msg_func_out "Condition $BBCOLORLEVEL\n$BBHOSTNAME $BBSVCNAME\n Please check the system health webpage|alert" + ;; +esac -- cgit v0.12 From 653c9fdd68704738da04d233dd7bdc8f25e0eaf8 Mon Sep 17 00:00:00 2001 From: James Meyer Date: Tue, 26 Nov 2013 15:53:44 -0600 Subject: xymon-server: ignore /cdrom /mnt /media for disk checks. Modify hbnotes.py so that it adds hosts to analysis.cfg. This is needed to define the proper checks per host type. refs #943 --- abs/core/xymon/PKGBUILD | 12 ++-- abs/core/xymon/analysis.cfg | 145 +++++++++++++++++++++++++------------------- abs/core/xymon/hbnotes.py | 130 ++++++++++++++++++++------------------- 3 files changed, 155 insertions(+), 132 deletions(-) diff --git a/abs/core/xymon/PKGBUILD b/abs/core/xymon/PKGBUILD index 0c797ee..a0c603b 100755 --- a/abs/core/xymon/PKGBUILD +++ b/abs/core/xymon/PKGBUILD @@ -1,7 +1,7 @@ pkgbase=xymon pkgname=(xymonserver xymonclient) pkgver=4.3.5 -pkgrel=49 +pkgrel=50 pkgdesc="Hobbit is a system for monitoring servers and networks. " license="GPL" arch=('i686' 'x86_64') @@ -128,17 +128,17 @@ md5sums=('31923ec126fe1c264fceb459d2175161' '758cdd9f1ba5ae3e1b42e811172806f3' 'ee608dcafc050d790e28647a670f6b60' 'b2f98ac0df013332deedc1efae0a270d' - 'c31bcb503a227ed5ff7b218581793bfc' + '1141fc6f846e91f380bbcdb212b44f7d' '0f70e76a164f648f0a4a01110137cb20' - 'f6921f0413215ae174e81306be80d37f' + 'b0664f3b38717dce911f59bcbd84e7a4' '80d9cfac86c6d96836e6f406e35e7cf5' 'd210c43fb9ee9ad6cd7648e0c2e0efea' '0c808fa12672289f86b0651545381308' '0469d775db9fdd18ea95dd41937ada82' '0757294eec13771f8e63da23cf066796' - '622f400fd098cbc43c203e3210a6694e' - '84442377c6e914b65519076bda4e17a4' - 'd7116dc59319b893e90a26e6beb83994' + '6baa410da1dfb86435191f4805186ea7' + '81715c58ae495812da0e0f18e2f74c76' + '77a542c2fd13468791ef23057ba8e77d' 'b4e8641e97e6b689dbc634af785e6799' 'e2844513e2c92e8b5084818f3b2a478d' '98e9242ae346f729b14cb195786571f2' diff --git a/abs/core/xymon/analysis.cfg b/abs/core/xymon/analysis.cfg index 0de317b..da833e1 100644 --- a/abs/core/xymon/analysis.cfg +++ b/abs/core/xymon/analysis.cfg @@ -348,81 +348,98 @@ HOST=_MASTERBACKEND_ - PROC sshd 1 - PROC lighttpd - PROC hobbitd - PROC crond - PROC mysql - PROC mythbackend - DISK * 95 99 - PORT "LOCAL=%([.:]80)$" state=LISTEN TEXT=http - PORT "LOCAL=%([.:]1337)$" state=LISTEN TEXT=cgi - PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + PROC sshd 1 + PROC lighttpd + PROC xymond + PROC crond + PROC mysql + PROC mythbackend + PROC msg_daemon.py + DISK %^/cdrom.* IGNORE + DISK %^/mnt.* IGNORE + DISK %^/media.* IGNORE + DISK * 95 99 + PORT "LOCAL=%([.:]80)$" state=LISTEN TEXT=http + PORT "LOCAL=%([.:]1337)$" state=LISTEN TEXT=cgi + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh HOST=_MASTERFRONTEND_ - PROC mythfrontend - PROC sshd 1 - PROC lighttpd - PROC hobbitd - PROC crond - PROC mysql - PROC mythbackend - DISK * 95 99 - PORT "LOCAL=%([.:]80)$" state=LISTEN TEXT=http - PORT "LOCAL=%([.:]1337)$" state=LISTEN TEXT=cgi - PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh - PORT "LOCAL=%([.:]5000)$" state=LISTEN TEXT=ghosd - PORT "LOCAL=%([.:]5001)$" state=LISTEN TEXT=ghosd + PROC mythfrontend + PROC sshd 1 + PROC lighttpd + PROC xymond + PROC crond + PROC mysql + PROC mythbackend + PROC msg_daemon.py + DISK %^/cdrom.* IGNORE + DISK %^/mnt.* IGNORE + DISK %^/media.* IGNORE + DISK * 95 99 + PORT "LOCAL=%([.:]80)$" state=LISTEN TEXT=http + PORT "LOCAL=%([.:]1337)$" state=LISTEN TEXT=cgi + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + HOST=_FRONTEND_ - PROC sshd 1 - PROC crond - PROC mythfrontend - PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh - PORT "LOCAL=%([.:]5000)$" state=LISTEN TEXT=ghosd - PORT "LOCAL=%([.:]5001)$" state=LISTEN TEXT=ghosd + PROC sshd 1 + PROC crond + PROC mythfrontend + PROC msg_daemon.py + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh HOST=_SLAVEBACKEND_ - PROC sshd 1 - PROC crond - PROC mythbackend - DISK * 95 99 - PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + PROC sshd 1 + PROC crond + PROC mythbackend + DISK %^/cdrom.* IGNORE + DISK %^/mnt.* IGNORE + DISK %^/media.* IGNORE + DISK * 95 99 + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh HOST=_SLAVEFRONTEND_ - PROC sshd 1 - PROC lighttpd - PROC hobbitd - PROC crond - PROC mythbackend - PROC mythfrontend - DISK * 95 99 - PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh - PORT "LOCAL=%([.:]5000)$" state=LISTEN TEXT=ghosd - PORT "LOCAL=%([.:]5001)$" state=LISTEN TEXT=ghosd + PROC sshd 1 + PROC lighttpd + PROC hobbitd + PROC crond + PROC mythbackend + PROC mythfrontend + PROC msg_daemon.py + DISK %^/cdrom.* IGNORE + DISK %^/mnt.* IGNORE + DISK %^/media.* IGNORE + DISK * 95 99 + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + HOST=_STANDALONE_ - PROC sshd 1 - PROC lighttpd - PROC hobbitd - PROC crond - PROC mysql - PROC mythbackend - DISK * 95 99 - PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh - PORT "LOCAL=%([.:]5000)$" state=LISTEN TEXT=ghosd - PORT "LOCAL=%([.:]5001)$" state=LISTEN TEXT=ghosd - PORT "LOCAL=%([.:]80)$" state=LISTEN TEXT=http - PORT "LOCAL=%([.:]1337)$" state=LISTEN TEXT=cgi - PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + PROC mythfrontend + PROC sshd 1 + PROC lighttpd + PROC xymond + PROC crond + PROC mysql + PROC mythbackend + PROC msg_daemon.py + DISK %^/cdrom.* IGNORE + DISK %^/mnt.* IGNORE + DISK %^/media.* IGNORE + DISK * 95 99 + PORT "LOCAL=%([.:]80)$" state=LISTEN TEXT=http + PORT "LOCAL=%([.:]1337)$" state=LISTEN TEXT=cgi + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh DEFAULT - # These are the built-in defaults. - UP 1h - LOAD 5.0 10.0 - DISK * 99 100 - MEMPHYS 100 101 - MEMSWAP 50 80 - MEMACT 90 97 + # These are the built-in defaults. + UP 1h + LOAD 5.0 10.0 + DISK %^/cdrom.* IGNORE + DISK %^/mnt.* IGNORE + DISK %^/media.* IGNORE + DISK * 99 100 + MEMPHYS 100 101 + MEMSWAP 50 80 + MEMACT 90 97 diff --git a/abs/core/xymon/hbnotes.py b/abs/core/xymon/hbnotes.py index d643044..b5ad72a 100644 --- a/abs/core/xymon/hbnotes.py +++ b/abs/core/xymon/hbnotes.py @@ -28,7 +28,7 @@ def create_dot_file(): result = cursor.fetchall() frontonly="_FRONTEND_" masterbackend="_MASTERBACKEND_" - masterbacked_w_fe="_MASTERFE_" + masterbacked_w_fe="_MASTERFRONTEND_" slave_w_fe="_SLAVEFE_" slave="_SLAVEBACKEND_" stand_alone="_STANDALONE_" @@ -166,7 +166,8 @@ def create_dot_file(): # cmd="/usr/bin/circo -Tpng -o/data/srv/httpd/htdocs/xymon/gifs/network.png " + dotfilename results=os.popen(cmd,'r') - + return frontonly,masterbackend,masterbacked_w_fe,slave_w_fe,slave,stand_alone + @@ -458,68 +459,73 @@ function Do_it () file.write(string) -def main(argv): - create_dot_file() - create_notes() - +def create_hobbit_analysis(frontonly, + masterbackend, + masterbacked_w_fe, + slave_w_fe, + slave, + stand_alone): -#------------------------------write client-config------------------------------------------------ - - - #-------------------------------------------------- - #hobbitfile="/home/xymon/server/etc/hobbit-clients.cfg" - - ###WHY WAS THIS HERE?? - #hobbitfile="/home/xymon/server/etc/tasks.cfg" - #hobbitf=open(hobbitfile,'r') - #hobbitconfig=hobbitf.readlines() - #hobbitf.close() - #hobbitf=open(hobbitfile,'w') - #print "FE",frontonly - #print "MBE",masterbackend - #print "MBEwFW",masterbacked_w_fe - #print "SLAVEwFW",slave_w_fe - #print "SLAVE",slave - #print "STD",stand_alone - - #for line in hobbitconfig: - #if line.startswith("HOST"): - #cline=line.split("=") - #if cline[1].startswith("_FRONTEND_"): - #pline="HOST=" - #pline+=frontonly.strip() - #hobbitf.write(pline + "\n") - #elif cline[1].startswith("_MASTERBACKEND_"): - #pline="HOST=" - #pline+=masterbackend.strip() - ##print pline - #hobbitf.write(pline+ "\n") - #elif cline[1].startswith("_MASTERFRONTEND_"): - #pline="HOST=" - #pline+=masterbacked_w_fe.strip() - ##print pline - #hobbitf.write(pline+ "\n") - #elif cline[1].startswith("_SLAVEBACKEND_"): - #pline="HOST=" - #pline+=slave.strip() - ##print pline - #hobbitf.write(pline+ "\n") - #elif cline[1].startswith("_SLAVEFE_"): - #pline="HOST=" - #pline+=slave_w_fe.strip() - ##print pline - #hobbitf.write(pline+ "\n") - #else: - ##print line - #hobbitf.write(line) - #else: - ##print line - #hobbitf.write(line) - -#-----------------------------------------------------------END CLIENT CONFIG - - # execute SQL statement + hobbitfile="/home/xymon/server/etc/analysis.cfg" + hobbitf=open(hobbitfile,'r') + hobbitconfig=hobbitf.readlines() + hobbitf.close() + try: + hobbitf=open(hobbitfile,'w') + print "FE",frontonly + print "MBE",masterbackend + print "MBEwFW",masterbacked_w_fe + print "SLAVEwFW",slave_w_fe + print "SLAVE",slave + print "STD",stand_alone + + for line in hobbitconfig: + if line.startswith("HOST"): + cline=line.split("=") + if cline[1].startswith("_FRONTEND_"): + pline="HOST=" + pline+=frontonly.strip() + hobbitf.write(pline + "\n") + elif cline[1].startswith("_MASTERBACKEND_"): + pline="HOST=" + pline+=masterbackend.strip() + #print pline + hobbitf.write(pline+ "\n") + elif cline[1].startswith("_MASTERFRONTEND_"): + pline="HOST=" + pline+=masterbacked_w_fe.strip() + #print pline + hobbitf.write(pline+ "\n") + elif cline[1].startswith("_SLAVEBACKEND_"): + pline="HOST=" + pline+=slave.strip() + #print pline + hobbitf.write(pline+ "\n") + elif cline[1].startswith("_SLAVEFE_"): + pline="HOST=" + pline+=slave_w_fe.strip() + #print pline + hobbitf.write(pline+ "\n") + else: + #print line + hobbitf.write(line) + else: + #print line + hobbitf.write(line) + hobbitf.close() + except: + print "problem writing analysis.cfg" +def main(argv): + frontonly,masterbackend,masterbacked_w_fe,slave_w_fe,slave,stand_alone = create_dot_file() + create_notes() + create_hobbit_analysis(frontonly, + masterbackend, + masterbacked_w_fe, + slave_w_fe, + slave, + stand_alone) + if __name__ == "__main__": main(sys.argv[1:]) -- cgit v0.12