/usr/share/doc/watchdog/examples/another-chance.sh is in watchdog 5.14-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | #!/bin/sh
# This is a "repair binary" for watchdog that allows the tests to fail N times
# within a given period before a reboot is called. Note that this "grace
# period" should really be a functionality of watchdog itself, IMHO.
#
# Erik Rossen <rossen@prolibre.com>
# If one does not change the default watchdog loop time of 10 secords, N=12
# will allow two minutes of failures before a reboot is signaled.
N=12
# CMAXAGE is the age in seconds that the counter file may have before it is
# considered too old and is wiped out.
CMAXAGE=20
ERR=$1
COUNTER=/var/run/watchdog.counter
if test -f $COUNTER; then
COUNTERAGE=$(stat -c %Y $COUNTER)
NOW=$(date +%s)
if test $(($COUNTERAGE+$CMAXAGE)) -lt $NOW ; then
rm $COUNTER
else
I=$(cat $COUNTER)
fi
fi
I=${I:-0}
I=$(($I+1))
logger -t "watchdog[$$]" "Failure $I of $N"
logger -t "watchdog[$$]" "PROCESS LIST:"
ps auxww | logger -t "watchdog[$$]"
if test "$I" -ge "$N" ; then
logger -t "watchdog[$$]" Too many failures. Signalling reboot.
rm $COUNTER
exit $ERR
fi
echo $I > $COUNTER
exit 0
|