diff options
Diffstat (limited to 'abs/core/xymon/analysis.cfg')
-rw-r--r-- | abs/core/xymon/analysis.cfg | 428 |
1 files changed, 428 insertions, 0 deletions
diff --git a/abs/core/xymon/analysis.cfg b/abs/core/xymon/analysis.cfg new file mode 100644 index 0000000..0de317b --- /dev/null +++ b/abs/core/xymon/analysis.cfg @@ -0,0 +1,428 @@ +# analysis.cfg - configuration file for clients reporting to Xymon +# +# This file is used by the xymond_client module, when it builds the +# cpu, disk, files, memory, msgs and procs status messages from the +# information reported by clients running on the monitored systems. +# +# This file must be installed on the Xymon server - client installations +# do not need this file. +# +# The file defines a series of rules: +# UP : Changes the "cpu" status when the system has rebooted recently, +# or when it has been running for too long. +# LOAD : Changes the "cpu" status according to the system load. +# CLOCK : Changes the "cpu" status if the client system clock is +# not synchronized with the clock of the Xymon server. +# DISK : Changes the "disk" status, depending on the amount of space +# used of filesystems. +# MEMPHYS: Changes the "memory" status, based on the percentage of real +# memory used. +# MEMACT : Changes the "memory" status, based on the percentage of "actual" +# memory used. Note: Not all systems report an "actual" value. +# MEMSWAP: Changes the "memory" status, based on the percentage of swap +# space used. +# PROC : Changes the "procs" status according to which processes were found +# in the "ps" listing from the client. +# LOG : Changes the "msgs" status according to entries in text-based logfiles. +# Note: The "client-local.cfg" file controls which logfiles the client will report. +# FILE : Changes the "files" status according to meta-data for files. +# Note: The "client-local.cfg" file controls which files the client will report. +# DIR : Changes the "files" status according to the size of a directory. +# Note: The "client-local.cfg" file controls which directories the client will report. +# PORT : Changes the "ports" status according to which tcp ports were found +# in the "netstat" listing from the client. +# DEFAULT: Set the default values that apply if no other rules match. +# +# All rules can be qualified so they apply only to certain hosts, or on certain +# times of the day (see below). +# +# Each type of rule takes a number of parameters: +# UP bootlimit toolonglimit +# The cpu status goes yellow if the system has been up for less than +# "bootlimit" time, or longer than "toolonglimit". The time is in +# minutes, or you can add h/d/w for hours/days/weeks - eg. "2h" for +# two hours, or "4w" for 4 weeks. +# Defaults: bootlimit=1h, toolonglimit=-1 (infinite). +# +# LOAD warnlevel paniclevel +# If the system load exceeds "warnlevel" or "paniclevel", the "cpu" +# status will go yellow or red, respectively. These are decimal +# numbers. +# Defaults: warnlevel=5.0, paniclevel=10.0 +# +# CLOCK maximum-offset +# If the system clock of the client differs from that of the Xymon +# server by more than "maximum-offset" seconds, then the CPU status +# column will go yellow. Note that the accuracy of this test is limited, +# since it is affected by the time it takes a client status report to +# go from the client to the Xymon server and be processed. You should +# therefore allow for a few seconds (5-10) of slack when you define +# your max. offset. +# It is not wise to use this test, unless your servers are synchronized +# to a common clock, e.g. through NTP. +# +# DISK filesystem warnlevel paniclevel +# DISK filesystem IGNORE +# If the utilization of "filesystem" is reported to exceed "warnlevel" +# or "paniclevel", the "disk" status will go yellow or red, respectively. +# "warnlevel" and "paniclevel" are either the percentage used, or the +# space available as reported by the local "df" command on the host. +# For the latter type of check, the "warnlevel" must be followed by the +# letter "U", e.g. "1024U". +# The special keyword "IGNORE" causes this filesystem to be ignored +# completely, i.e. it will not appear in the "disk" status column and +# it will not be tracked in a graph. This is useful for e.g. removable +# devices, backup-disks and similar hardware. +# "filesystem" is the mount-point where the filesystem is mounted, e.g. +# "/usr" or "/home". A filesystem-name that begins with "%" is interpreted +# as a Perl-compatible regular expression; e.g. "%^/oracle.*/" will match +# any filesystem whose mountpoint begins with "/oracle". +# Defaults: warnlevel=90%, paniclevel=95% +# +# MEMPHYS warnlevel paniclevel +# MEMACT warnlevel paniclevel +# MEMSWAP warnlevel paniclevel +# If the memory utilization exceeds the "warnlevel" or "paniclevel", the +# "memory" status will change to yellow or red, respectively. +# Note: The words "PHYS", "ACT" and "SWAP" are also recognized. +# Defaults: MEMPHYS warnlevel=100 paniclevel=101 (i.e. it will never go red) +# MEMSWAP warnlevel=50 paniclevel=80 +# MEMACT warnlevel=90 paniclevel=97 +# +# PROC processname minimumcount maximumcount color [TRACK=id] [TEXT=displaytext] +# The "ps" listing sent by the client will be scanned for how many +# processes containing "processname" are running, and this is then +# matched against the min/max settings defined here. If the running +# count is outside the thresholds, the color of the "procs" status +# changes to "color". +# To check for a process that must NOT be running: Set minimum and +# maximum to 0. +# +# "processname" can be a simple string, in which case this string must +# show up in the "ps" listing as a command. The scanner will find +# a ps-listing of e.g. "/usr/sbin/cron" if you only specify "processname" +# as "cron". +# "processname" can also be a Perl-compatiable regular expression, e.g. +# "%java.*inst[0123]" can be used to find entries in the ps-listing for +# "java -Xmx512m inst2" and "java -Xmx256 inst3". In that case, +# "processname" must begin with "%" followed by the reg.expression. +# If "processname" contains whitespace (blanks or TAB), you must enclose +# the full string in double quotes - including the "%" if you use regular +# expression matching. E.g. +# PROC "%xymond_channel --channel=data.*xymond_rrd" 1 1 yellow +# or +# PROC "java -DCLASSPATH=/opt/java/lib" 2 5 +# +# You can have multiple "PROC" entries for the same host, all of the +# checks are merged into the "procs" status and the most severe +# check defines the color of the status. +# +# The TRACK=id option causes the number of processes found to be recorded +# in an RRD file, with "id" as part of the filename. This graph will then +# appear on the "procs" page as well as on the "trends" page. Note that +# "id" must be unique among the processes tracked for each host. +# +# The TEXT=displaytext option affects how the process appears on the +# "procs" status page. By default, the process is listed with the +# "processname" as identification, but if this is a regular expression +# it may be a bit difficult to understand. You can then use e.g. +# "TEXT=Apache" to make these processes appear with the name "Apache" +# instead. +# +# Defaults: mincount=1, maxcount=-1 (unlimited), color="red". +# Note: No processes are checked by default. +# +# Example: Check that "cron" is running: +# PROC cron +# Example: Check that at least 5 "httpd" processes are running, but +# not more than 20: +# PROC httpd 5 20 +# +# LOG filename match-pattern [COLOR=color] [IGNORE=ignore-pattern] [TEXT=displaytext] +# In the "client-local.cfg" file, you can list any number of files +# that the client will collect log data from. These are sent to the +# Xymon server together with the other client data, and you can then +# choose how to analyze the log data with LOG entries. +# +# ************ IMPORTANT *************** +# To monitor a logfile, you *MUST* configure both client-local.cfg +# and analysis.cfg. If you configure only the client-local.cfg +# file, the client will collect the log data and you can view it in +# the "client data" display, but it will not affect the color of the +# "msgs" status. On the other hand, if you configure only the +# analysis.cfg file, then there will be no log data to inspect, +# and you will not see any updates of the "msgs" status either. +# +# "filename" is a filename or pattern. The set of files reported by +# the client is matched against "filename", and if they match then +# this LOG entry is processed against the data from a file. +# +# "match-pattern": The log data is matched against this pattern. If +# there is a match, this log file causes a status change to "color". +# +# "ignore-pattern": The log data that matched "match-pattern" is also +# matched against "ignore-pattern". If the data matches the "ignore-pattern", +# this line of data does not affect the status color. In other words, +# the "ignore-pattern" can be used to refine the strings which cause +# a match. +# Note: The "ignore-pattern" is optional. +# +# "color": The color which this match will trigger. +# Note: "color" is optional, if omitted then "red" will be used. +# +# Example: Go yellow if the text "WARNING" shows up in any logfile. +# LOG %.* WARNING COLOR=yellow +# +# Example: Go red if the text "I/O error" or "read error" appears. +# LOG %/var/(adm|log)/messages %(I/O|read).error COLOR=red +# +# FILE filename [color] [things to check] [TRACK] +# NB: The files you wish to monitor must be listed in a "file:..." +# entry in the client-local.cfg file, in order for the client to +# report any data about them. +# +# "filename" is a filename or pattern. The set of files reported by +# the client is matched against "filename", and if they match then +# this FILE entry is processed against the data from that file. +# +# [things to check] can be one or more of the following: +# - "NOEXIST" triggers a warning if the file exists. By default, +# a warning is triggered for files that have a FILE entry, but +# which do not exist. +# - "TYPE=type" where "type" is one of "file", "dir", "char", "block", +# "fifo", or "socket". Triggers warning if the file is not of the +# specified type. +# - "OWNERID=owner" and "GROUPID=group" triggers a warning if the owner +# or group does not match what is listed here. "owner" and "group" is +# specified either with the numeric uid/gid, or the user/group name. +# - "MODE=mode" triggers a warning if the file permissions are not +# as listed. "mode" is written in the standard octal notation, e.g. +# "644" for the rw-r--r-- permissions. +# - "SIZE<max.size" and "SIZE>min.size" triggers a warning it the file +# size is greater than "max.size" or less than "min.size", respectively. +# You can append "K" (KB), "M" (MB), "G" (GB) or "T" (TB) to the size. +# If there is no such modifier, KB is assumed. +# E.g. to warn if a file grows larger than 1MB (1024 KB): "SIZE<1M". +# - "SIZE=size" triggers a warning it the file size is not what is listed. +# - "MTIME>min.mtime" and "MTIME<max.mtime" checks how long ago the file +# was last modified (in seconds). E.g. to check if a file was updated +# within the past 10 minutes (600 seconds): "MTIME<600". Or to check +# that a file has NOT been updated in the past 24 hours: "MTIME>86400". +# - "MTIME=timestamp" checks if a file was last modified at "timestamp". +# "timestamp" is a unix epoch time (seconds since midnight Jan 1 1970 UTC). +# - "CTIME>min.ctime", "CTIME<max.ctime", "CTIME=timestamp" acts as the +# mtime checks, but for the ctime timestamp (when the files' directory +# entry was last changed, eg. by chown, chgrp or chmod). +# - "MD5=md5sum", "SHA1=sha1sum", "RMD160=rmd160sum" trigger a warning +# if the file checksum using the MD5, SHA1 or RMD160 message digest +# algorithms do not match the one configured here. Note: The "file" +# entry in the client-local.cfg file must specify which algorithm to use. +# +# "TRACK" causes the size of this file to be tracked in an RRD file, and +# shown on the graph on the "files" display. +# +# Example: Check that the /var/log/messages file is not empty and was updated +# within the past 10 minutes, and go yellow if either fails: +# FILE /var/log/messages SIZE>0 MTIME<600 yellow +# +# Example: Check the timestamp, size and SHA-1 hash of the /bin/sh program: +# FILE /bin/sh MTIME=1128514608 SIZE=645140 SHA1=5bd81afecf0eb93849a2fd9df54e8bcbe3fefd72 +# +# DIR directory [color] [SIZE<maxsize] [SIZE>minsize] [TRACK] +# NB: The directories you wish to monitor must be listed in a "dir:..." +# entry in the client-local.cfg file, in order for the client to +# report any data about them. +# +# "directory" is a filename or pattern. The set of directories reported by +# the client is matched against "directory", and if they match then +# this DIR entry is processed against the data for that directory. +# +# "SIZE<maxsize" and "SIZE>minsize" defines the size limits that the +# directory must stay within. If it goes outside these limits, a warning +# will trigger. Note the Xymon uses the raw number reported by the +# local "du" command on the client. This is commonly KB, but it may be +# disk blocks which are often 512 bytes. +# +# "TRACK" causes the size of this directory to be tracked in an RRD file, +# and shown on the graph on the "files" display. +# +# PORT [LOCAL=addr] [EXLOCAL=addr] [REMOTE=addr] [EXREMOTE=addr] [STATE=state] [EXSTATE=state] [MIN=mincount] [MAX=maxcount] [COLOR=color] [TRACK=id] [TEXT=displaytext] +# The "netstat" listing sent by the client will be scanned for how many +# sockets match the criteria listed. +# "addr" is a (partial) address specification in the format used on +# the output from netstat. This is typically "10.0.0.1:80" for the IP +# 10.0.0.1, port 80. Or "*:80" for any local address, port 80. +# NB: The Xymon clients normally report only the numeric data for +# IP-adresses and port-numbers, so you must specify the port +# number (e.g. "80") instead of the service name ("www"). +# "state" causes only the sockets in the specified state to be included; +# it is usually LISTEN or ESTABLISHED. +# The socket count is then matched against the min/max settings defined +# here. If the count is outside the thresholds, the color of the "ports" +# status changes to "color". +# To check for a socket that must NOT exist: Set minimum and +# maximum to 0. +# +# "addr" and "state" can be a simple strings, in which case these string must +# show up in the "netstat" at the appropriate column. +# "addr" and "state" can also be a Perl-compatiable regular expression, e.g. +# "LOCAL=%(:80|:443)" can be used to find entries in the netstat local port for +# both http (port 80) and https (port 443). In that case, portname or state must +# begin with "%" followed by the reg.expression. +# +# The TRACK=id option causes the number of sockets found to be recorded +# in an RRD file, with "id" as part of the filename. This graph will then +# appear on the "ports" page as well as on the "trends" page. Note that +# "id" must be unique among the ports tracked for each host. +# +# The TEXT=displaytext option affects how the port appears on the +# "ports" status page. By default, the port is listed with the +# local/remote/state rules as identification, but this may be somewhat +# difficult to understand. You can then use e.g. "TEXT=Secure Shell" to make +# these ports appear with the name "Secure Shell" instead. +# +# Defaults: state="LISTEN", mincount=1, maxcount=-1 (unlimited), color="red". +# Note: No ports are checked by default. +# +# Example: Check that there is someone listening on the https port: +# PORT "LOCAL=%([.:]443)$" state=LISTEN TEXT=https +# +# Example: Check that at least 5 "ssh" connections are established, but +# not more than 10; warn but do not error; graph the connection count: +# PORT "LOCAL=%([.:]22)$" state=ESTABLISHED min=5 max=20 color=yellow TRACK=ssh "TEXT=SSH logins" +# +# Example: Check that ONLY ports 22, 80 and 443 are open for incoming connections: +# PORT STATE=LISTEN LOCAL=%0.0.0.0[.:].* EXLOCAL=%[.:](22|80|443)$ MAX=0 "TEXT=Bad listeners" +# +# +# To apply rules to specific hosts, you can use the "HOST=", "EXHOST=", "PAGE=" +# "EXPAGE=", "CLASS=" or "EXCLASS=" qualifiers. (These act just as in the +# alerts.cfg file). +# +# Hostnames are either a comma-separated list of hostnames (from the hosts.cfg file), +# "*" to indicate "all hosts", or a Perl-compatible regular expression. +# E.g. "HOST=dns.foo.com,www.foo.com" identifies two specific hosts; +# "HOST=%www.*.foo.com EXHOST=www-test.foo.com" matches all hosts with a name +# beginning with "www", except the "www-test" host. +# "PAGE" and "EXPAGE" match the hostnames against the page on where they are +# located in the hosts.cfg file, via the hosts' page/subpage/subparent +# directives. This can be convenient to pick out all hosts on a specific page. +# +# Rules can be dependant on time-of-day, using the standard Xymon syntax +# (the hosts.cfg(5) about the NKTIME parameter). E.g. "TIME=W:0800:2200" +# applied to a rule will make this rule active only on week-days between +# 8AM and 10PM. +# +# You can also associate a GROUP id with a rule. The group-id is passed to +# the alert module, which can then use it to control who gets an alert when +# a failure occurs. E.g. the following associates the "httpd" process check +# with the "web" group, and the "sshd" check with the "admins" group: +# PROC httpd 5 GROUP=web +# PROC sshd 1 GROUP=admins +# In the alerts.cfg file, you could then have rules like +# GROUP=web +# MAIL webmaster@foo.com +# GROUP=admins +# MAIL root@foo.com +# +# Qualifiers must be placed after each rule, e.g. +# LOAD 8.0 12.0 HOST=db.foo.com TIME=*:0800:1600 +# +# If you have multiple rules that you want to apply the same qualifiers to, +# you can write the qualifiers *only* on one line, followed by the rules. E.g. +# HOST=%db.*.foo.com TIME=W:0800:1600 +# LOAD 8.0 12.0 +# DISK /db 98 100 +# PROC mysqld 1 +# will apply the three rules to all of the "db" hosts on week-days between 8AM +# and 4PM. This can be combined with per-rule qualifiers, in which case the +# per-rule qualifier overrides the general qualifier; e.g. +# HOST=%.*.foo.com +# LOAD 7.0 12.0 HOST=bax.foo.com +# LOAD 3.0 8.0 +# will result in the load-limits being 7.0/12.0 for the "bax.foo.com" host, +# and 3.0/8.0 for all other foo.com hosts. +# +# The special DEFAULT section can modify the built-in defaults - this must +# be placed at the end of the file. + + +HOST=_MASTERBACKEND_ + PROC sshd 1 + PROC lighttpd + PROC hobbitd + PROC crond + PROC mysql + PROC mythbackend + DISK * 95 99 + PORT "LOCAL=%([.:]80)$" state=LISTEN TEXT=http + PORT "LOCAL=%([.:]1337)$" state=LISTEN TEXT=cgi + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + +HOST=_MASTERFRONTEND_ + PROC mythfrontend + PROC sshd 1 + PROC lighttpd + PROC hobbitd + PROC crond + PROC mysql + PROC mythbackend + DISK * 95 99 + PORT "LOCAL=%([.:]80)$" state=LISTEN TEXT=http + PORT "LOCAL=%([.:]1337)$" state=LISTEN TEXT=cgi + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + PORT "LOCAL=%([.:]5000)$" state=LISTEN TEXT=ghosd + PORT "LOCAL=%([.:]5001)$" state=LISTEN TEXT=ghosd + + +HOST=_FRONTEND_ + PROC sshd 1 + PROC crond + PROC mythfrontend + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + PORT "LOCAL=%([.:]5000)$" state=LISTEN TEXT=ghosd + PORT "LOCAL=%([.:]5001)$" state=LISTEN TEXT=ghosd + +HOST=_SLAVEBACKEND_ + PROC sshd 1 + PROC crond + PROC mythbackend + DISK * 95 99 + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + +HOST=_SLAVEFRONTEND_ + PROC sshd 1 + PROC lighttpd + PROC hobbitd + PROC crond + PROC mythbackend + PROC mythfrontend + DISK * 95 99 + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + PORT "LOCAL=%([.:]5000)$" state=LISTEN TEXT=ghosd + PORT "LOCAL=%([.:]5001)$" state=LISTEN TEXT=ghosd + +HOST=_STANDALONE_ + PROC sshd 1 + PROC lighttpd + PROC hobbitd + PROC crond + PROC mysql + PROC mythbackend + DISK * 95 99 + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + PORT "LOCAL=%([.:]5000)$" state=LISTEN TEXT=ghosd + PORT "LOCAL=%([.:]5001)$" state=LISTEN TEXT=ghosd + PORT "LOCAL=%([.:]80)$" state=LISTEN TEXT=http + PORT "LOCAL=%([.:]1337)$" state=LISTEN TEXT=cgi + PORT "LOCAL=%([.:]22)$" state=LISTEN TEXT=ssh + +DEFAULT + # These are the built-in defaults. + UP 1h + LOAD 5.0 10.0 + DISK * 99 100 + MEMPHYS 100 101 + MEMSWAP 50 80 + MEMACT 90 97 + |