/usr/share/check_mk/checks/df.include is in check-mk-server 1.2.2p3-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | #!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# | ____ _ _ __ __ _ __ |
# | / ___| |__ ___ ___| | __ | \/ | |/ / |
# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
# | | |___| | | | __/ (__| < | | | | . \ |
# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
# | |
# | Copyright Mathias Kettner 2013 mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation in version 2. check_mk is distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more de-
# ails. You should have received a copy of the GNU General Public
# License along with GNU Make; see the file COPYING. If not, write
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
# Common include file for all filesystem checks (df, df_netapp, hr_fs, ...)
# Settings for filesystem checks (df, df_vms, df_netapp and maybe others)
filesystem_levels = [] # obsolete. Just here to check config and warn if changed
filesystem_default_levels = {} # can also be dropped some day in future
# Filesystems to ignore (shouldn't be sent by agent anyway)
inventory_df_exclude_fs = [ 'nfs', 'smbfs', 'cifs', 'iso9660' ]
inventory_df_exclude_mountpoints = [ '/dev' ]
# Grouping of filesystems into groups that are monitored as one entity
# Example:
# filesystem_groups = [
# ( [ ( "Storage pool", "/data/pool*" ) ], [ 'linux', 'prod' ], ALL_HOSTS ),
# ( [ ( "Backup space 1", "/usr/backup/*.xyz" ),
# ( "Backup space 2", "/usr/backup2/*.xyz" ) ], ALL_HOSTS ),
# ]
filesystem_groups = []
# Alternative syntax for parameters:
# { "levels" : (80, 90), # levels in percent
# "magic" : 0.5, # magic factor
# "magic_normsize" : 20, # normsize in GB
# "levels_low" : (50, 60), # magic never lowers levels below this (percent)
# "trend_range" : 24, # hours
# "trend_mb" : (10, 20), # MB of change during trend_range
# "trend_perc" : (1, 2), # Percent change during trend_range
# "trend_timeleft" : (72, 48) # run time left in hours until full
# }
factory_settings["filesystem_default_levels"] = {
"levels" : (80, 90), # warn/crit in percent
"magic_normsize" : 20, # Standard size if 20 GB
"levels_low" : (50, 60), # Never move warn level below 50% due to magic factor
"trend_range" : 24,
"trend_perfdata" : True, # do send performance data for trends
}
def df_inventory(mplist):
import fnmatch
group_patterns = {}
for line in host_extra_conf(g_hostname, filesystem_groups):
for group_name, pattern in line:
group_patterns.setdefault(group_name, []).append(pattern)
# Create one service for each mount point that is not contained
# in a filesystem group. And create one service for each non-empty
# filesystem group.
inventory = []
have_groups = set([])
for mp in mplist:
in_group = False
for group_name, patterns in group_patterns.items():
for pattern in patterns:
if fnmatch.fnmatch(mp, pattern):
have_groups.add(group_name)
in_group = True
break
if not in_group:
inventory.append((mp, {}))
for group_name in have_groups:
inventory.append((group_name, { "patterns" : group_patterns[group_name] }))
return inventory
# Users might have set filesystem_default_levels to old format like (80, 90)
# needed by df, df_netapp and vms_df and maybe others in future:
# compute warning and critical levels. Takes into account the size of
# the filesystem and the magic number. Since the size is only known at
# check time this function's result cannot be precompiled.
# Note: this function is in our days only needed in order to support
# old style params a'la (80, 90). As soon as we drop support for that
# (can happen any decade now), we can get rid of this function.
def get_filesystem_levels(host, mountpoint, size_gb, params):
# Start with factory settings
levels = factory_settings["filesystem_default_levels"].copy()
# convert default levels to dictionary. This is in order support
# old style levels like (80, 90)
if type(filesystem_default_levels) == dict:
levels.update(filesystem_default_levels)
else:
levels = factory_settings["filesystem_default_levels"].copy()
levels["levels"] = filesystem_default_levels[:2]
if len(filesystem_default_levels) == 2:
levels["magic"] = None
else:
levels["magic"] = filesystem_default_levels[2]
# If params is a dictionary, make that override the default values
if type(params) == dict:
levels.update(params)
else: # simple format - explicitely override levels and magic
levels["levels"] = params[:2]
if len(params) >= 3:
levels["magic"] = params[2]
warn, crit = levels["levels"]
# If the magic factor is used, take disk size and magic factor
# into account in order to move levels
magic = levels.get("magic")
if magic:
normsize = levels["magic_normsize"]
hgb_size = size_gb / float(normsize)
felt_size = hgb_size ** magic
scale = felt_size / hgb_size
warn_scaled = 100 - (( 100 - warn ) * scale)
crit_scaled = 100 - (( 100 - crit ) * scale)
# Make sure, levels do never get too low due to magic factor
lowest_warning_level, lowest_critical_level = levels["levels_low"]
if warn_scaled < lowest_warning_level:
warn_scaled = lowest_warning_level
if crit_scaled < lowest_critical_level:
crit_scaled = lowest_critical_level
else:
warn_scaled = warn
crit_scaled = crit
size_mb = size_gb * 1024
warn_mb = savefloat(size_mb * warn_scaled / 100)
crit_mb = savefloat(size_mb * crit_scaled / 100)
levels["levels_mb"] = (warn_mb, crit_mb)
levels["levels_text"] = "(levels at %.1f/%.1f%%)" % (warn_scaled, crit_scaled)
return levels
# Legacy function for checks that do not support groups yet
def df_check_filesystem(hostname, mountpoint, size_mb, avail_mb, params):
return df_check_filesystem_list(mountpoint, params, [(mountpoint, size_mb, avail_mb)])
# New function for checks that support groups.
def df_check_filesystem_list(item, params, fslist):
if "patterns" in params:
import fnmatch
patterns = params["patterns"]
count = 0
total_size = 0
total_avail = 0
for mp, size_mb, avail_mb in fslist:
for pattern in patterns:
if fnmatch.fnmatch(mp, pattern):
count += 1
total_size += size_mb
total_avail += avail_mb
break
# If no filesystem has been found we cannot do the
# actual check since the size is zero.
if count == 0:
return (3, "UKNOWN - No filesystem matching the patterns")
else:
status, infotext, perfdata = df_check_filesystem_single(g_hostname, item, total_size, total_avail, params)
infotext += " (%d filesystems)" % count
return status, infotext, perfdata
else:
for mp, size_mb, avail_mb in fslist:
if mp == item:
return df_check_filesystem_single(g_hostname, mp, size_mb, avail_mb, params)
return (3, "UNKNOWN - filesystem not found")
def df_check_filesystem_single(hostname, mountpoint, size_mb, avail_mb, params):
if size_mb == 0:
return (1, "WARN - size of filesystem is 0 MB")
used_mb = size_mb - avail_mb
used_perc = 100.0 * (float(used_mb) / size_mb)
size_gb = size_mb / 1024.0
# Get warning and critical levels already with 'magic factor' applied
levels = get_filesystem_levels(g_hostname, mountpoint, size_gb, params)
warn_mb, crit_mb = levels["levels_mb"]
# Take into account magic scaling factor (third optional argument
# in check params). A factor of 1.0 changes nothing. Factor should
# be > 0 and <= 1. A smaller factor raises levels for big file systems
# bigger than 100 GB and lowers it for file systems smaller than 100 GB.
# Please run df_magic_factor.py to understand how it works.
# TODO: In some future version use a fixed name as perf variable
perf_var = mountpoint.replace(" ", "_")
perfdata = [(perf_var, str(used_mb) + 'MB', warn_mb, crit_mb, 0, size_mb)]
infotext = "%.1f%% used (%.2f of %.1f GB), %s" % \
(used_perc, used_mb / 1024.0, size_gb, levels["levels_text"])
# Trends. The trends are computed in two steps. In the first step we
# compute the delta to the last check, using a normal check_mk counter.
# In the second step we compute an average over that counter and can
# make a long-time prediction. If we have that values we can optionally
# apply levels.
# Trend parameters:
# "trend_range" : 24, # hours
# "trend_mb" : (10, 20), # MB of change during trend_range
# "trend_perc" : (1, 2), # Percent change during trend_range
# "trend_timeleft" : (72, 48) # run time left in hours until full
status = 0
problems = []
MB = 1024 * 1024.0
H24 = 60 * 60 * 24
if levels.get("trend_range"):
try:
range = levels["trend_range"] # in hours
range_sec = range * 3600.0
this_time = time.time()
# first compute current rate in MB/s by computing delta since last check
timedif, rate = get_counter("df.%s.delta" % mountpoint, this_time, used_mb, True)
if levels.get("trend_perfdata"):
# Change in 1.1.13i3: The trend perfdata always outputs
# the growth in MB/24h, not any longer in MB/trendrange
perfdata.append(("growth", rate * H24))
# average trend, initialize with zero, rate_avg is in MB/s
timedif, rate_avg = get_average("df.%s.trend" % mountpoint,
this_time, rate, range_sec / 60.0, True)
# rate_avg is growth in MB/s, trend is in MB per trend range hours
trend = rate_avg * range_sec
sign = trend > 0 and "+" or ""
infotext += ", trend: %s%s / %g hours" % \
(sign, get_bytes_human_readable(trend * MB), range)
# levels for performance data
warn_perf, crit_perf = None, None
# apply levels for absolute growth in MB / interval
trend_mb = levels.get("trend_mb")
if trend_mb:
wa, cr = trend_mb
warn_perf, crit_perf = wa, cr
if trend >= wa:
problems.append("growing too fast (levels at %s/%s per %.1fh)!" %
( get_bytes_human_readable(wa * MB), get_bytes_human_readable(cr * MB), range))
status = max(1, status)
if trend >= cr:
status = 2
problems[-1] += "!"
else:
wa, cr = None, None
# apply levels for growth relative to filesystem size
trend_perc = levels.get("trend_perc")
if trend_perc:
wa_perc, cr_perc = trend_perc
wa = wa_perc / 100.0 * size_mb
cr = cr_perc / 100.0 * size_mb
if warn_perf != None:
warn_perf = min(warn_perf, wa)
crit_perf = min(crit_perf, cr)
else:
warn_perf, crit_perf = wa, cr
if trend >= wa:
problems.append("growing too fast (levels at %.3f%%/%.3f%% per %.1fh)!" %
( wa_perc, cr_perc, range))
status = max(1, status)
if trend >= cr:
status = 2
problems[-1] += "!"
if levels.get("trend_perfdata"):
# New in 1.1.13i3: output trend not as MB / trend_range, but as
# MB / 24 hours. The same holds for the warn and crit information.
# It is configured in MB / trend range but in the performance data
# it's sent as MB / 24h.
perfdata.append(("trend", rate_avg * H24,
warn_perf != None and (warn_perf / range_sec * H24) or None,
crit_perf != None and (crit_perf / range_sec * H24) or None,
0, size_mb / range))
# compute time until filesystem is full (only for positive trend, of course)
if trend > 0:
space_left = size_mb - used_mb
hours_left = space_left / trend * range
timeleft = levels.get("trend_timeleft")
if timeleft:
wa, cr = timeleft
if hours_left <= cr:
status = 2
problems.append("only %.1fh until disk full!!" % hours_left)
elif hours_left <= wa:
status = max(status, 1)
problems.append("only %.1fh until disk full!" % hours_left)
elif hours_left <= wa * 2:
problems.append("time left: %.1fh" % hours_left)
except MKCounterWrapped:
# need more data for computing a trend
# In this case erase all perfdata to prevent an rrd file which has no space
# for the trend information
perfdata = []
if used_mb >= crit_mb:
status = 2
elif used_mb >= warn_mb:
status = max(1, status)
if len(problems) > 0:
infotext += " - " + ", ".join(problems)
return (status, nagios_state_names[status] + " - " + infotext, perfdata)
|