/usr/bin/slon_watchdog is in slony1-2-bin 2.0.7-3build1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | #!/usr/bin/perl
#
# Author: Christopher Browne
# Copyright 2004-2009 Afilias Canada
use Getopt::Long;
# Defaults
$CONFIG_FILE = '/etc/slony1/slon_tools.conf';
$SHOW_USAGE = 0;
# Read command-line options
GetOptions("config=s" => \$CONFIG_FILE,
"help" => \$SHOW_USAGE);
my $USAGE =
"Usage: slon_watchdog [--config file] node# sleep_seconds
--config file Location of the slon_tools.conf file
sleep_seconds Number of seconds for the watchdog process to sleep
between checks
";
if ($SHOW_USAGE or scalar(@ARGV) != 2) {
die $USAGE;
}
require '/usr/share/slony1/slon-tools.pm';
require $CONFIG_FILE;
$node = $ARGV[0];
$sleep = $ARGV[1];
if ($node =~/^(?:node)?(\d+)$/) {
$nodenum = $1;
} else {
die $USAGE;
}
while (1) {
$pid = get_pid($node);
if (!($pid)) {
my ($dsn, $dbname) = ($DSN[$nodenum], $DBNAME[$nodenum]);
my ($logfile) = "$LOGDIR/slon-$dbname-$node.err";
open (SLONLOG, ">>$logfile");
print SLONLOG "WATCHDOG: No Slon is running for node $node!\n";
print SLONLOG "WATCHDOG: You ought to check the postmaster and slon for evidence of a crash!\n";
print SLONLOG "WATCHDOG: I'm going to restart slon for $node...\n";
# First, restart the node using slonik
if ($CONFIG_FILE ne "") {
system "(/usr/bin/slonik_restart_node --config=${CONFIG_FILE} $node | /usr/bin/slonik) >> $logfile 2>> $logfile";
} else {
system "(/usr/bin/slonik_restart_node $node | /usr/bin/slonik) >> $logfile 2>> $logfile";
}
# Next, restart the slon process to service the node
start_slon($nodenum);
$pid = get_pid($node);
print SLONLOG "WATCHDOG: Restarted slon for the $CLUSTER_NAME cluster, PID $pid\n";
} else {
open(LOG, ">>$LOGDIR/slon_watchdog.log");
print LOG "\n";
system "date >> $LOGDIR/slon_watchdog.log";
print LOG "Found slon daemon running for the $CLUSTER_NAME cluster, PID $pid\n";
print LOG "Looks Ok\n";
print LOG "Sleeping for $sleep +/- " . int($sleep/2) . " seconds\n";
}
close(PSOUT);
sleep $sleep + (rand($sleep) - $sleep/2);
}
|