/usr/lib/oar/finaud is in oar-server 2.5.7-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | #!/usr/bin/perl
# $Id$
# This program aims to change node state automatically when they are inaccesible or they become alive
use OAR::IO;
use OAR::Modules::Judas qw(oar_debug oar_warn oar_error set_current_log_category);
use OAR::PingChecker qw(test_hosts);
use OAR::Conf qw(init_conf dump_conf get_conf is_conf);
use Data::Dumper;
use strict;
use IO::Socket::INET;
# Log category
set_current_log_category('main');
oar_debug("[finaud] Finaud started\n");
oar_debug("[finaud] Check Alive and Suspected nodes\n");
my $base = OAR::IO::connect();
my @node_list_tmp = OAR::IO::get_finaud_nodes($base);
my $Occupied_nodes;
my $check_occupied_nodes = 'no';
# get in conf the options that tells if we have to check nodes
# that are running jobs.
init_conf($ENV{OARCONFFILE});
if (is_conf("CHECK_NODES_WITH_RUNNING_JOB")){
$check_occupied_nodes = get_conf("CHECK_NODES_WITH_RUNNING_JOB");
}
if ($check_occupied_nodes eq 'no'){
$Occupied_nodes = OAR::IO::get_current_assigned_nodes($base);
}
my %Nodes_hash;
foreach my $i (@node_list_tmp){
if ($check_occupied_nodes eq 'no'){
if (!defined($Occupied_nodes->{$i->{network_address}})){
$Nodes_hash{$i->{network_address}} = $i;
}
}else{
$Nodes_hash{$i->{network_address}} = $i;
}
}
my @Nodes_to_check = keys(%Nodes_hash);
oar_debug("[finaud] Testing resource(s) on : @Nodes_to_check\n");
# Call the right program to test each nodes
my %bad_node_hash;
foreach my $i (test_hosts(@Nodes_to_check)){
$bad_node_hash{$i} = 1;
}
#Make the decisions
my $return_value = 0;
foreach my $i (values(%Nodes_hash)){
if (defined($bad_node_hash{$i->{network_address}}) and ($i->{state} eq "Alive")){
OAR::IO::set_node_nextState($base,$i->{network_address},"Suspected");
OAR::IO::update_node_nextFinaudDecision($base,$i->{network_address},"YES");
OAR::IO::add_new_event_with_host($base, "FINAUD_ERROR", 0, "Finaud has detected an error on the node", [$i->{network_address}]);
$return_value = 1;
oar_debug("[finaud] Set the next state of $i->{network_address} to Suspected\n");
}elsif (!defined($bad_node_hash{$i->{network_address}}) and ($i->{state} eq "Suspected")){
OAR::IO::set_node_nextState($base,$i->{network_address},"Alive");
OAR::IO::update_node_nextFinaudDecision($base,$i->{network_address},"YES");
OAR::IO::add_new_event_with_host($base, "FINAUD_RECOVER", 0, "Finaud has detected that the node comes back", [$i->{network_address}]);
$return_value = 1;
oar_debug("[finaud] Set the next state of $i->{network_address} to Alive\n");
}
}
OAR::IO::disconnect($base);
oar_debug("[finaud] Finaud ended : $return_value\n");
exit($return_value);
|