summaryrefslogtreecommitdiffstats
path: root/abs/core/xymon/xymon-gputemp.sh
blob: f8af6494587a69a66ac9aa40aa95fb80d1de0f4d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/bin/bash



gpu_temp=0
gpu_color="clear"

if ! test -x /usr/bin/nvidia-smi ; then

  $XYMON $XYMSRV "status $MACHINE.gputemp $gpu_color `date`
  Couldn't find nvidia-smi

  GPUTEMP : $gpu_temp

  "
  exit 0
fi

# Use NVidia SMI to determine the model
gpu_model=`nvidia-smi -L`
if test -z "$gpu_model" ; then
  $XYMON $XYMSRV "status $MACHINE.gputemp $gpu_color `date`
  Couldn't find nVidia card

  GPUTEMP : $gpu_temp

  "
  exit 0
fi

# Determine GPU maximum temperature based on GPU model
if test $(echo $gpu_model | grep -c -i 'GeForce 210') -eq 0 ; then
  # GT 210 maximum GPU temperature is 105 C
  max_temp=105
elif test $(echo $gpu_model | grep -c -i 'GeForce 220') -eq 0 ; then
  # GT 220 maximum GPU temperature is 105 C
  max_temp=105
elif test $(echo $gpu_model | grep -c -i 'GeForce 240') -eq 0 ; then
  # GT 240 maximum GPU temperature is 105 C
  max_temp=105
elif test $(echo $gpu_model | grep -c -i 'GeForce 430') -eq 0 ; then
  # GT 430 maximum GPU temperature is  98 C
  max_temp=98
elif test $(echo $gpu_model | grep -c -i 'GeForce GTS 450') -eq 0 ; then
  # GTS 450 maximum GPU temperature is  100 C
  max_temp=100
elif test $(echo $gpu_model | grep -c -i 'GeForce GT 520') -eq 0 ; then
  # GT 520 maximum GPU temperature is  102 C
  max_temp=102
else
  # Assume 100 is the maximum
  max_temp=100
fi
crit_temp=`expr $max_temp - 5`
warn_temp=`expr $crit_temp - 10`

if test -x /usr/bin/nvidia-smi ; then
  temp=`nvidia-smi -q -g 0 -d TEMPERATURE | grep -Ei 'GPU Current Temp[[:space:]]+:[[:space:]]+[[:digit:]]+ C'`
  gpu_temp=`echo $temp | sed -e 's/[^0-9]*//g'`
  #echo "GPUTEMP:$gpu_temp" > /tmp/gputemp.txt
  if test -n "$gpu_temp" ; then
    # Have some type of number
    if test $gpu_temp -lt $warn_temp ; then
      gpu_color=green
    elif test $gpu_temp -lt $crit_temp ; then
      gpu_color=yellow
    else
      gpu_color=red
    fi
    #echo gputemp $gpu_temp $gpu_color
    #echo $XYMON $XYMSRV "status $MACHINE.gputemp $gpu_color `date`
    #`cat /tmp/gputemp.txt`
    #"
    $XYMON $XYMSRV "status $MACHINE.gputemp $gpu_color `date`
    GPU model is $gpu_model
    max temp is $max_temp

    GPUTEMP : $gpu_temp
    "
  fi
fi

exit 0