This file is indexed.

/usr/bin/makeflow_log_parser is in coop-computing-tools 4.0-1ubuntu1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/perl
#Programmer: Andrew Thrasher
#Date: 10/12/2009
#Updated: 9/17/2012, need to deal with the added comments throughout the log for events like aborts and restarts
#Purpose: Parse makeflow log files to generate useful logs

$log = $ARGV[0];

open(LOGFILE, "<", $log);
open(SUMMARY, ">", $log.".summary");
open(OUT, ">", "$log.clean"); 
$line = <LOGFILE>;
#ignore makeflow structure information at the top of the log file and only deal with actual log data
while($line =~ /^#/){
	$line = <LOGFILE>;
}
chomp $line; 
#get start time from the first line
@fields = split(/ /, $line);
$timestart = $fields[0];
$prior_time = $timestart;
$fields[0] = $fields[0] / 1000000; 
foreach $f (@fields){
	print OUT "$f "; 
}
print OUT "\n"; 

$goodput;
$badput;

$num_tasks = $fields[9];
@tasks[$num_tasks]; 
if($fields[2] == 1)
{
	#entered running state, store elapsed time (in this case it is the first entry so no time has elapsed
	$tasks[$fields[1]] = 0; #$fields[0];
}
elsif($fields[2] == 2)
{
	#completed
	$goodput += ($fields[0] - $tasks[$fields[1]]);
}
elsif($fields[2] == 3 || $fields[2] == 4)
{
	#failed or aborted
	$badput += ($fields[0] - $tasks[$fields[1]]);
}
else
{
	#went back to waiting
}


#Need to determine the total CPU time
#use number of tasks as indices to each task
$err = 0;
$totaltime = 0; 
$err_time;  
while($line = <LOGFILE>)
{
	chomp $line; 
	if($line =~ /^#/){
		#we're on a restart, abort, etc. 
		#we need to change the prior time so that we don't count the elapsed time while the makeflow was inactive
		$err = 1; 
		next; 
	}

	@fields = split(/ /, $line);
	$time = $fields[0]; 
	
	if($err){
		#the prior line was an error status
		#therefore we need to reset the prior time
		$err_time += ($time - $prior_time); 
		$last_err = ($time - $prior_time); 
		$err = 0; 
		
	}
	$totaltime += ($time - $prior_time); 

	#fields: timestamp, work item #, new state, jobID (random id returned by worker), # of nodes waiting, # of nodes running, # of nodes completed, # of nodes failed, # of nodes aborted, # of jobs counter

	
	if($fields[2] == 1)
	{
	        #entered running state, store timestamp
		#instead of storing the timestamp, store the elapsed time
		$tasks[$fields[1]] = $totaltime; 
	}
	elsif($fields[2] == 2)
	{
	        #completed
		$goodput += (($totaltime - $tasks[$fields[1]]) - $last_err); 
	}
	elsif($fields[2] == 3 || $fields[2] == 4)
	{
	        #failed or aborted
	        $badput += ($totaltime - $tasks[$fields[1]] - $last_err); 
	}
	else
	{
	        #went back to waiting

	}

	$prior_time = $time; 
	$fields[0] = $fields[0] / 1000000; 
	foreach $f (@fields){
		print OUT "$f "; 
	}
	print OUT "\n"; 


}
$totaltime -= $err_time; 
$totaltime = $totaltime /1000000;
$goodput = $goodput / 1000000; 
$badput = $badput / 1000000;  
print SUMMARY "Elapsed Time: ", int($totaltime/(24*60*60)),"+", ($totaltime/(60*60))%24,":", ($totaltime/60)%60,":", $totaltime%60, "\n";
print SUMMARY "Jobs Submitted: ", $fields[9], "\n";
print SUMMARY "Jobs Completed: ", $fields[6], "\n";
print SUMMARY "Goodput: ", int($goodput/(24*60*60)),"+", ($goodput/(60*60))%24,":", ($goodput/60)%60,":", $goodput%60, "\n";
print SUMMARY "Badput: ",  int($badput/(24*60*60)),"+", ($badput/(60*60))%24,":", ($badput/60)%60,":", $badput%60, "\n";
$cpu = $goodput + $badput;
print SUMMARY "Total CPU time: ",  int($cpu/(24*60*60)),"+", ($cpu/(60*60))%24,":", ($cpu/60)%60,":", $cpu%60, "\n";


open (GNUPLOT, "|gnuplot");
#open (GNUPLOT, ">", "$log.gnuplot"); 
print GNUPLOT <<EOPLOT;
set terminal postscript solid eps 24 color
set size 2.5,1
set output "$log.eps"
set xdata time
set ylabel "Jobs Submitted / Complete"
set y2label "Jobs Running"
set y2tics 
set bmargin 4
set style line 1 
set key outside
set timefmt "%s"
plot "$log.clean" using 1:(\$6+\$7+\$8+\$9) title "Submitted" with lines lw 5, "" using 1:6 title "Running" with lines lw 5, "" using 1:7 title "Complete" with lines lw 5
EOPLOT
close(GNUPLOT);