/usr/share/check_mk/checks/drbd is in check-mk-server 1.2.2p3-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 | #!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# | ____ _ _ __ __ _ __ |
# | / ___| |__ ___ ___| | __ | \/ | |/ / |
# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
# | | |___| | | | __/ (__| < | | | | . \ |
# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
# | |
# | Copyright Mathias Kettner 2013 mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation in version 2. check_mk is distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more de-
# ails. You should have received a copy of the GNU General Public
# License along with GNU Make; see the file COPYING. If not, write
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
# Author: Lars Michelsen <lm@mathias-kettner.de>
# Example outputs from agent:
#
# While syncing:
# <<<drbd>>>
# version: 8.3.8 (api:88/proto:86-94)
# GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by cssint@erzc20, 2010-06-17 14:47:26
# 0: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r----
# ns:12031428 nr:0 dw:12031364 dr:1175992347 al:2179 bm:71877 lo:37 pe:0 ua:37 ap:0 ep:1 wo:b oos:301729988
# [=======>............] sync'ed: 42.4% (294656/510908)M delay_probe: 145637
# finish: 1:23:28 speed: 60,172 (51,448) K/sec
#
# Sync stalled:
# <<<drbd>>>
# b01srv05:~ # cat /proc/drbd
# version: 8.3.8 (api:88/proto:86-94)
# GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by cssint@erzc20, 2010-06-17 14:47:26
# 0: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r----
# ns:11545876 nr:0 dw:11545900 dr:954551211 al:1955 bm:58360 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:523171100
# [>....................] sync'ed: 0.1% (510908/510908)M delay_probe: 135599
# stalled
#
# Synced:
# <<<drbd>>>
# version: 8.3.8 (api:88/proto:86-94)
# GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by cssint@erzc20, 2010-06-17 14:47:26
# 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r----
# ns:12227928 nr:0 dw:12227864 dr:1477722351 al:2300 bm:90294 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
# Description of the /proc/drbd output:
# http://www.drbd.org/users-guide/ch-admin.html#s-proc-drbd
#
# The information from /proc/drbd are grouped as followed (Extracted from doc above)
#
# General:
# cs (connection state). Status of the network connection. See the section called
# “Connection states” for details about the various connection states.
# Available States:
# StandAlone. No network configuration available. The resource has not yet been connected,
# or has been administratively disconnected (using drbdadm disconnect),
# or has dropped its connection due to failed authentication or split brain.
# Disconnecting. Temporary state during disconnection. The next state is StandAlone.
# Unconnected. Temporary state, prior to a connection attempt.
# Possible next states: WFConnection and WFReportParams.
# Timeout. Temporary state following a timeout in the communication with the peer. Next state: Unconnected.
# BrokenPipe. Temporary state after the connection to the peer was lost. Next state: Unconnected.
# NetworkFailure. Temporary state after the connection to the partner was lost. Next state: Unconnected.
# ProtocolError. Temporary state after the connection to the partner was lost. Next state: Unconnected.
# TearDown. Temporary state. The peer is closing the connection. Next state: Unconnected.
# WFConnection. This node is waiting until the peer node becomes visible on the network.
# WFReportParams. TCP connection has been established, this node waits for the first network packet from the peer.
# Connected. A DRBD connection has been established, data mirroring is now active. This is the normal state.
# StartingSyncS. Full synchronization, initiated by the administrator, is just starting.
# The next possible states are: SyncSource or PausedSyncS.
# StartingSyncT. Full synchronization, initiated by the administrator, is just starting. Next state: WFSyncUUID.
# WFBitMapS. Partial synchronization is just starting. Next possible states: SyncSource or PausedSyncS.
# WFBitMapT. Partial synchronization is just starting. Next possible state: WFSyncUUID.
# WFSyncUUID. Synchronization is about to begin. Next possible states: SyncTarget or PausedSyncT.
# SyncSource. Synchronization is currently running, with the local node being the source of synchronization.
# SyncTarget. Synchronization is currently running, with the local node being the target of synchronization.
# PausedSyncS. The local node is the source of an ongoing synchronization, but synchronization is currently paused.
# This may be due to a dependency on the completion of another synchronization process,
# or due to synchronization having been manually interrupted by drbdadm pause-sync.
# PausedSyncT. The local node is the target of an ongoing synchronization, but synchronization
# is currently paused. This may be due to a dependency on the completion of another
# synchronization process, or due to synchronization having been manually interrupted by drbdadm pause-sync.
# VerifyS. On-line device verification is currently running, with the local node being the source of verification.
# VerifyT. On-line device verification is currently running, with the local node being the target of verification.
#
# ro (roles). Roles of the nodes. The role of the local node is displayed first, followed by the role of the partner
# node shown after the slash. See the section called “Resource roles” for details about the possible resource roles.
# Available Roles:
# Primary. The resource is currently in the primary role, and may be read from and written to.
# This role only occurs on one of the two nodes, unless dual-primary node is enabled.
# Secondary. The resource is currently in the secondary role. It normally receives updates
# from its peer (unless running in disconnected mode), but may neither be read from
# nor written to. This role may occur on one node or both nodes.
# Unknown. The resource's role is currently unknown. The local resource role never has this status.
# It is only displayed for the peer's resource role, and only in disconnected mode.
#
# ds (disk states). State of the hard disks. Prior to the slash the state of the local node is displayed,
# after the slash the state of the hard disk of the partner node is shown.
# See the section called “Disk states” for details about the various disk states.
# Disk States:
# Diskless. No local block device has been assigned to the DRBD driver. This may mean that the resource
# has never attached to its backing device, that it has been manually detached using drbdadm detach
# or that it automatically detached after a lower-level I/O error.
# Attaching. Transient state while reading meta data.
# Failed. Transient state following an I/O failure report by the local block device. Next state: Diskless.
# Negotiating. Transient state when an Attach is carried out on an already-connected DRBD device.
# Inconsistent. The data is inconsistent. This status occurs immediately upon creation of a new resource,
# on both nodes (before the initial full sync). Also, this status is found in one node
# (the synchronization target) during synchronization.
# Outdated. Resource data is consistent, but outdated.
# DUnknown. This state is used for the peer disk if no network connection is available.
# Consistent. Consistent data of a node without connection. When the connection
# is established, it is decided whether the data are UpToDate or Outdated.
# UpToDate. Consistent, up-to-date state of the data. This is the normal state.
#
# Network:
# ns (network send). Volume of net data sent to the partner via the network connection; in Kibyte.
# nr (network receive). Volume of net data received by the partner via the network connection; in Kibyte.
# Disk:
# dw (disk write). Net data written on local hard disk; in Kibyte.
# dr (disk read). Net data read from local hard disk; in Kibyte.
# Stats:
# al (activity log). Number of updates of the activity log area of the meta data.
# bm (bit map). Number of updates of the bitmap area of the meta data.
# lo (local count). Number of open requests to the local I/O sub-system issued by DRBD.
# pe (pending). Number of requests sent to the partner, but that have not yet been answered by the latter.
# ua (unacknowledged). Number of requests received by the partner via the network connection, but that have not yet been answered.
# ap (application pending). Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD.
# ep (epochs). Number of epoch objects. Usually 1. Might increase under I/O load
# when using either the barrier or the none write ordering method. Since 8.2.7.
# wo (write order). Currently used write ordering method: b (barrier), f (flush), d (drain) or n (none). Since 8.2.7.
# oos (out of sync). Amount of storage currently out of sync; in Kibibytes. Since 8.2.6.
# Default thresholds for drbd checks
drbd_net_default_levels = ( None, None )
drbd_disk_default_levels = ( None, None )
drbd_stats_default_levels = ( None, None, None, None, None, None, None, None, None )
drbd_block_start_match = re.compile('^[0-9]+:')
drbd_general_map = [ 'cs', 'ro', 'ds' ]
drbd_net_map = [ 'cs', 'ns', 'nr' ]
drbd_disk_map = [ 'cs', 'dw', 'dr' ]
drbd_stats_map = [ 'cs', 'al', 'bm', 'lo', 'pe', 'ua', 'ap', 'ep', 'wo', 'oos' ]
drbd_cs_map = {
'StandAlone': 1, 'Disconnecting': 1,
'Unconnected': 2, 'Timeout': 2,
'BrokenPipe': 2, 'NetworkFailure': 2,
'ProtocolError': 2, 'TearDown': 2,
'WFConnection': 2, 'WFReportParams': 1,
'Connected': 0, 'StartingSyncS': 1,
'StartingSyncT': 1, 'WFBitMapS': 1,
'WFBitMapT': 1, 'WFSyncUUID': 1,
'SyncSource': 1, 'SyncTarget': 1,
'PausedSyncS': 1, 'PausedSyncT': 1,
'VerifyS': 0, 'VerifyT': 0,
}
def inventory_drbd(info, checktype):
inventory = []
for line in info[2:]:
if drbd_block_start_match.search(line[0]) > 0:
parsed = drbd_parse_block(drbd_extract_block('drbd%s' % line[0][:-1], info), checktype)
# Skip unconfigured drbd devices
if parsed['cs'] == 'Unconfigured':
continue
if checktype == 'drbd':
levels = '( [ "%s", "%s" ], [ "%s", "%s" ] )' % \
(parsed['ro'][0], parsed['ro'][1],
parsed['ds'][0], parsed['ds'][1])
elif checktype == 'drbd.net':
levels = "drbd_net_default_levels"
elif checktype == 'drbd.disk':
levels = "drbd_disk_default_levels"
elif checktype == 'drbd.stats':
levels = "drbd_stats_default_levels"
inventory.append(('drbd%s' % line[0][:-1], levels))
return inventory
def drbd_parse_block(block, to_parse):
parsed = {}
for line in block:
for field in line:
parts = field.split(':')
if len(parts) > 1:
# Only parse the requested information depending on the check
# to be executed now
if to_parse == 'drbd' and parts[0] in drbd_general_map:
if parts[0] in [ 'ro', 'ds' ]:
parsed[parts[0]] = parts[1].split('/')
else:
parsed[parts[0]] = parts[1]
elif to_parse == 'drbd.net' and parts[0] in drbd_net_map:
parsed[parts[0]] = parts[1]
elif to_parse == 'drbd.disk' and parts[0] in drbd_disk_map:
parsed[parts[0]] = parts[1]
elif to_parse == 'drbd.stats' and parts[0] in drbd_stats_map:
parsed[parts[0]] = parts[1]
return parsed
def drbd_extract_block(item, info):
block = []
inBlock = False
# Ignore the first two lines since they contain drbd version information
for line in info[2:]:
if "drbd" + line[0][:-1] == item:
inBlock = True
elif inBlock and drbd_block_start_match.search(line[0]) > 0 \
and "drbd" + line[0][:-1] != item:
# Another block starts. So the requested block is finished
break
# Skip unwanted lines
if not inBlock:
continue
# If this is reached we are in the wanted block
block.append(line)
return block
def drbd_get_block(item, info, checktype):
block = drbd_extract_block(item, info)
if len(block) > 0:
return drbd_parse_block(block, checktype)
else:
return None
def check_drbd_general(item, params, info):
parsed = drbd_get_block(item, info, 'drbd')
if not parsed is None:
if parsed['cs'] == 'Unconfigured':
return (2, 'CRIT - The device is "Unconfigured"')
elif not parsed['cs'] in drbd_cs_map:
return (3, 'UNKNOWN - Undefined "connection state" in drbd output')
# Weight of connection state is calculated by the drbd_cs_map.
# The roles and disk states are calculated using the expected values
state = drbd_cs_map[parsed['cs']]
output = 'Connection State: %s' % parsed['cs']
output += ', Roles: %s/%s' % (parsed['ro'][0], parsed['ro'][1])
if params[0] is not None and parsed['ro'] != params[0]:
state = state < 2 and 2 or state
output += ' (Expected: %s/%s)' % (params[0][0], params[0][1])
output += ', Disk States: %s/%s' % (parsed['ds'][0], parsed['ds'][1])
if params[1] is not None and parsed['ds'] != params[1]:
state = state < 2 and 2 or state
output += ' (Expected: %s/%s)' % (params[1][0], params[1][1])
return (state, "%s - %s" % (nagios_state_names[state], output))
return (3, "UNKNOWN - Undefined state")
def drbd_get_counters(list):
now = time.time()
output = ''
perfdata = []
for type, name, item, value, uom in list:
try:
timedif, rate = get_counter("%s.%s.%s" % (type, name, item), now, value)
perfdata.append((name, rate))
output += ' %s/sec: %s%s' % (name, rate, uom)
except MKCounterWrapped:
perfdata = []
break
return (output, perfdata)
def check_drbd_net(item, params, info):
parsed = drbd_get_block(item, info, 'drbd.net')
if not parsed is None:
if parsed['cs'] == 'Unconfigured':
return (2, 'CRIT - The device is "Unconfigured"')
output, perfdata = drbd_get_counters([ ('drbd.net', 'in', item, int(parsed['nr']), 'kb'),
('drbd.net', 'out', item, int(parsed['ns']), 'kb') ])
# FIXME: Maybe handle thresholds in the future
return (0, "OK -%s" % output, perfdata)
return (3, "UNKNOWN - Undefined state")
def check_drbd_disk(item, params, info):
parsed = drbd_get_block(item, info, 'drbd.disk')
if not parsed is None:
if parsed['cs'] == 'Unconfigured':
return (2, 'CRIT - The device is "Unconfigured"')
output, perfdata = drbd_get_counters([ ('drbd.disk', 'write', item, int(parsed['dw']), 'kb'),
('drbd.disk', 'read', item, int(parsed['dr']), 'kb') ])
# FIXME: Maybe handle thresholds in the future
return (0, "OK -%s" % output, perfdata)
return (3, "UNKNOWN - Undefined state")
def check_drbd_stats(item, params, info):
parsed = drbd_get_block(item, info, 'drbd.stats')
if not parsed is None:
if parsed['cs'] == 'Unconfigured':
return (2, 'CRIT - The device is "Unconfigured"')
output = ''
perfdata = []
for key, label in [ ('al', 'activity log updates'), ('bm', 'bit map updates'),
('lo', 'local count requests'), ('pe', 'pending requests'),
('ua', 'unacknowledged requests'), ('ap', 'application pending requests'),
('ep', 'epoch objects'), ('wo', 'write order'),
('oos', 'kb out of sync') ]:
if key in parsed:
output += '%s: %s, ' % (label, parsed[key])
else:
parsed[key] = 0 # perfdata must always have same number of entries
perfdata.append(('%s' % label.replace(" ", "_"), parsed[key]))
return (0, 'OK - ' + output.rstrip(', '), perfdata)
return (3, "UNKNOWN - Undefined state")
check_info['drbd'] = (check_drbd_general, "DRBD %s status", 1, lambda info: inventory_drbd(info, "drbd"))
check_info['drbd.net'] = (check_drbd_net, "DRBD %s net", 1, lambda info: inventory_drbd(info, "drbd.net"))
check_info['drbd.disk'] = (check_drbd_disk, "DRBD %s disk", 1, lambda info: inventory_drbd(info, "drbd.disk"))
check_info['drbd.stats'] = (check_drbd_stats, "DRBD %s stats", 1, lambda info: inventory_drbd(info, "drbd.stats"))
|