/usr/share/check_mk/modules/prediction.py is in check-mk-server 1.2.6p12-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 | #!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# | ____ _ _ __ __ _ __ |
# | / ___| |__ ___ ___| | __ | \/ | |/ / |
# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
# | | |___| | | | __/ (__| < | | | | . \ |
# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
# | |
# | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation in version 2. check_mk is distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more de-
# ails. You should have received a copy of the GNU General Public
# License along with GNU Make; see the file COPYING. If not, write
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
# Code for predictive monitoring / anomaly detection
# Export data from an RRD file. This requires an up-to-date
# version of the rrdtools.
def debug(x):
import pprint ; pprint.pprint(x)
def rrd_export(filename, ds, cf, fromtime, untiltime, rrdcached=None):
# rrdtool xport --json -s 1361554418 -e 1361640814 --step 60 DEF:x=/omd/sites/heute/X.rrd:1:AVERAGE XPORT:x:HIRNI
cmd = "rrdtool xport --json -s %d -e %d --step 60 " % (fromtime, untiltime)
if rrdcached and os.path.exists(rrdcached):
cmd += "--daemon '%s' " % rrdcached
cmd += " DEF:x=%s:%s:%s XPORT:x 2>&1" % (filename, ds, cf)
# if opt_debug:
# sys.stderr.write("Running %s\n" % cmd)
f = os.popen(cmd)
output = f.read()
exit_code = f.close()
if exit_code:
raise MKGeneralException("Cannot fetch RRD data: %s" % output)
# Parse without json module (this is not always available)
# Our data begins at "data: [...". The sad thing: names are not
# quoted here. Don't know why. We fake this by defining variables.
about = "about"
meta = "meta"
start = "start"
step = "step"
end = "end"
legend = "legend"
data = "data"
null = None
# begin = output.index("data:")
# data_part = output[begin + 5:-2]
data = eval(output)
return data["meta"]["step"], [ x[0] for x in data["data"] ]
def find_ds_in_pnp_xmlfile(xml_file, varname):
ds = None
name = None
for line in file(xml_file):
line = line.strip()
if line.startswith("<DS>"):
ds = line[4:].split('<')[0]
if name == varname:
return int(ds)
elif line.startswith("<LABEL>"):
name = line[7:].split('<')[0]
if ds and name == varname:
return int(ds)
else:
ds = None
elif line == '<DATASOURCE>':
ds = None
name = None
def get_rrd_data(hostname, service_description, varname, cf, fromtime, untiltime):
global rrdcached_socket
rrd_base = "%s/%s/%s" % (rrd_path, pnp_cleanup(hostname),
pnp_cleanup(service_description))
# First try PNP storage type MULTIPLE
rrd_file = rrd_base + "_%s.rrd" % pnp_cleanup(varname)
ds = 1
if not os.path.exists(rrd_file):
# We need to look into the XML file of PNP in order to
# find the correct DS number.
xml_file = rrd_base + ".xml"
if not os.path.exists(xml_file):
raise MKGeneralException("Cannot do prediction: XML file %s missing" % xml_file)
rrd_file = rrd_base + ".rrd"
if not os.path.exists(rrd_file):
raise MKGeneralException("Cannot do prediction: RRD file missing")
# Let's parse the XML file in a silly, but fast way, that does
# not need any further module.
ds = find_ds_in_pnp_xmlfile(xml_file, varname)
if ds == None:
raise MKGeneralException("Cannot do prediction: variable %s not known" % varname)
if omd_root and not rrdcached_socket:
rrdcached_socket = omd_root + "/tmp/run/rrdcached.sock"
return rrd_export(rrd_file, ds, cf, fromtime, untiltime, rrdcached_socket)
daynames = [ "monday", "tuesday", "wednesday", "thursday",
"friday", "saturday", "sunday"]
# Check wether a certain time stamp lies with in daylight safing time (DST)
def is_dst(timestamp):
return time.localtime(timestamp).tm_isdst
# Returns the timezone *including* DST shift at a certain point of time
def timezone_at(timestamp):
if is_dst(timestamp):
return time.altzone
else:
return time.timezone
def group_by_wday(t):
wday = time.localtime(t).tm_wday
day_of_epoch, rel_time = divmod(t - timezone_at(t), 86400)
return daynames[wday], rel_time
def group_by_day(t):
return "everyday", (t - timezone_at(t)) % 86400
def group_by_day_of_month(t):
broken = time.localtime(t)
mday = broken[2]
return str(mday), (t - timezone_at(t)) % 86400
def group_by_everyhour(t):
return "everyhour", (t - timezone_at(t)) % 3600
prediction_periods = {
"wday" : {
"slice" : 86400, # 7 slices
"groupby" : group_by_wday,
"valid" : 7,
},
"day" : {
"slice" : 86400, # 31 slices
"groupby" : group_by_day_of_month,
"valid" : 28,
},
"hour" : {
"slice" : 86400, # 1 slice
"groupby" : group_by_day,
"valid" : 1,
},
"minute" : {
"slice" : 3600, # 1 slice
"groupby" : group_by_everyhour,
"valid" : 24,
},
}
def get_prediction_timegroup(t, period_info):
# Convert to local timezone
timegroup, rel_time = period_info["groupby"](t)
from_time = t - rel_time
until_time = t - rel_time + period_info["slice"]
return timegroup, from_time, until_time, rel_time
def compute_prediction(pred_file, timegroup, params, period_info, from_time, dsname, cf):
import math
# Collect all slices back into the past until the time horizon
# is reached
begin = from_time
slices = []
absolute_begin = from_time - params["horizon"] * 86400
# The resolutions of the different time ranges differ. We interpolate
# to the best resolution. We assume that the youngest slice has the
# finest resolution. We also assume, that each step is always dividable
# by the smallest step.
# Note: due to the f**king DST, we can have several shifts between
# DST and non-DST during are computation. We need to compensate for
# those. DST swaps within slices are being ignored. The DST flag
# is checked against the beginning of the slice.
smallest_step = None
while begin >= absolute_begin:
tg, fr, un, rel = get_prediction_timegroup(begin, period_info)
if tg == timegroup:
step, data = get_rrd_data(g_hostname, g_service_description,
dsname, cf, fr, un-1)
if smallest_step == None:
smallest_step = step
slices.append((fr, step / smallest_step, data))
begin -= period_info["slice"]
# Now we have all the RRD data we need. The next step is to consolidate
# all that data into one new array.
num_points = len(slices[0][2])
consolidated = []
for i in xrange(num_points):
# print "PUNKT %d --------------------------------------" % i
point_line = []
for from_time, scale, data in slices:
idx = int(i / float(scale))
if idx < len(data):
d = data[idx]
if d != None:
point_line.append(d)
# else:
# date_str = time.strftime("%Y-%m-%d %H:%M", time.localtime(fr + ((un - fr) * i / float(num_points))))
# print "Keine Daten fur %s / %d/%s/ %.2f " % (date_str, i, float(scale),i/float(scale))
if point_line:
average = sum(point_line) / len(point_line)
consolidated.append([
average,
min(point_line),
max(point_line),
stdev(point_line, average),
])
else:
consolidated.append([None, None, None, None])
result = {
"num_points" : num_points,
"step" : smallest_step,
"columns" : [ "average", "min", "max", "stdev" ],
"points" : consolidated,
}
return result
def stdev(point_line, average):
return math.sqrt(sum([ (p-average)**2 for p in point_line ]) / len(point_line))
# cf: consilidation function (MAX, MIN, AVERAGE)
# levels_factor: this multiplies all absolute levels. Usage for example
# in the cpu.loads check the multiplies the levels by the number of CPU
# cores.
def get_predictive_levels(dsname, params, cf, levels_factor=1.0):
# Compute timegroup
now = time.time()
period_info = prediction_periods[params["period"]]
# timegroup: name of the group, like 'monday' or '12'
# from_time: absolute epoch time of the first second of the
# current slice.
# until_time: absolute epoch of the first second *not* in the slice
# rel_time: seconds offset of now in the current slice
timegroup, from_time, until_time, rel_time = \
get_prediction_timegroup(now, period_info)
# Compute directory for prediction data
dir = "%s/prediction/%s/%s/%s" % (var_dir, g_hostname,
pnp_cleanup(g_service_description), pnp_cleanup(dsname))
if not os.path.exists(dir):
os.makedirs(dir)
pred_file = "%s/%s" % (dir, timegroup)
info_file = pred_file + ".info"
# Check, if we need to (re-)compute the prediction file. This is
# the case if:
# - no prediction has been done yet for this time group
# - the prediction from the last time is outdated
# - the prediction from the last time has done with other parameters
try:
last_info = eval(file(info_file).read())
for k, v in params.items():
if last_info.get(k) != v:
if opt_debug:
sys.stderr.write("Prediction parameters have changed.\n")
last_info = None
break
except IOError:
if opt_debug:
sys.stderr.write("No previous prediction for group %s available.\n" % timegroup)
last_info = None
if last_info and last_info["time"] + period_info["valid"] * period_info["slice"] < now:
if opt_debug:
sys.stderr.write("Prediction of %s outdated.\n" % timegroup)
last_info = None
if last_info:
# TODO: faster file format. Binary encoded?
prediction = eval(file(pred_file).read())
else:
# Remove all prediction files that result from other
# prediction periods. This is e.g. needed if the user switches
# the parameter from 'wday' to 'day'.
for f in os.listdir(dir):
if f.endswith(".info"):
try:
info = eval(file(dir + "/" + f).read())
if info["period"] != params["period"]:
if opt_debug:
sys.stderr.write("Removing obsolete prediction %s\n" % f[:-5])
os.remove(dir + "/" + f)
os.remove(dir + "/" + f[:-5])
except:
pass
if opt_debug:
sys.stderr.write("Computing prediction for time group %s.\n" % timegroup)
prediction = compute_prediction(pred_file, timegroup, params, period_info, from_time, dsname, cf)
info = {
"time" : now,
"range" : (from_time, until_time),
"cf" : cf,
"dsname" : dsname,
"slice" : period_info["slice"],
}
info.update(params)
file(info_file, "w").write("%r\n" % info)
file(pred_file, "w").write("%r\n" % prediction)
# Find reference value in prediction
index = int(rel_time / prediction["step"])
# print "rel_time: %d, step: %d, Index: %d, num_points: %d" % (rel_time, prediction["step"], index, prediction["num_points"])
# print prediction.keys()
reference = dict(zip(prediction["columns"], prediction["points"][index]))
# print "Reference: %s" % reference
ref_value = reference["average"]
stdev = reference["stdev"]
levels = []
if not ref_value: # No reference data available
levels = ((None, None), (None, None))
else:
for what, sig in [ ( "upper", 1 ), ( "lower", -1 )]:
p = "levels_" + what
if p in params:
how, (warn, crit) = params[p]
if how == "absolute":
this_levels = (ref_value + (sig * warn * levels_factor), ref_value + (sig * crit * levels_factor))
elif how == "relative":
this_levels = (ref_value + sig * (ref_value * warn / 100),
ref_value + sig * (ref_value * crit / 100))
else: # how == "stdev":
this_levels = (ref_value + sig * (stdev * warn),
ref_value + sig * (stdev * crit))
if what == "upper" and "levels_upper_min" in params:
limit_warn, limit_crit = params["levels_upper_min"]
this_levels = (max(limit_warn, this_levels[0]), max(limit_crit, this_levels[1]))
levels.append(this_levels)
else:
levels.append((None, None))
# print levels
return ref_value, levels
|