/usr/bin/sga-bam2de is in sga 0.10.15-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | #! /usr/bin/perl
use strict;
use Getopt::Long;
my $n = 5;
my $k = 99;
my $minLength = 200;
my $prefix = "";
my $numThreads = 1;
my $mind = -99; # minimum gap size to pass to abyss
my $mina = 100; # minimum alignment length to pass to abyss
my $help = ''; # (false)
# Filter the abyss distance est histogram to remove insert sizes
# with fewer than hist_min data points
my $hist_min = 3;
GetOptions("prefix=s" => \$prefix,
"k=i" => \$k,
"n=i" => \$n,
"m=i" => \$minLength,
"mind=i" => \$mind,
"mina=i" => \$mina,
"t=i" => \$numThreads,
"help" => \$help);
if($help)
{
usage();
exit(0);
}
checkDependency("abyss-fixmate");
checkDependency("DistanceEst");
my $bFail = 0;
if($prefix eq "")
{
print("Error: A prefix must be specified\n");
$bFail = 1;
}
my $bamFile = $ARGV[0];
if($bamFile eq "")
{
print "Error: A bam file must be provided\n";
$bFail = 1;
}
if($minLength <= 0)
{
print "Error: minimum contig length (-m) must be greater than 0\n";
$bFail = 1;
}
if($bFail)
{
usage();
exit(1);
}
my $cmd;
# fix-mate info
$cmd = "abyss-fixmate -h $prefix.tmp.hist $bamFile | samtools view -Sb - > $prefix.diffcontigs.bam";
runCmd($cmd);
# DistanceEst is very slow when the learned insert size distribution is very wide
# To work around this, remove singleton data points from the histogram
runCmd("awk \'\$2 >= $hist_min\' $prefix.tmp.hist > $prefix.hist");
# sort
$cmd = "samtools sort -\@$numThreads $prefix.diffcontigs.bam -o $prefix.diffcontigs.sorted.bam";
runCmd($cmd);
# distance est
my $mind_opt = "--mind $mind";
$cmd = "DistanceEst -s $minLength $mind_opt -n $n -k $k -j $numThreads -o $prefix.de $prefix.hist -l $mina $prefix.diffcontigs.sorted.bam";
runCmd($cmd);
sub usage
{
print "sga-bam2de.pl - Make a distance estimate file from a bam file of reads aligned to contigs\n";
print "sga-bam2de.pl -n N --prefix lib300 lib300.bam\n";
print "Options:\n";
print " -n N Minimum number of pairs required to consider two contigs linked\n";
print " -m LEN Only find links between contigs with length at least LEN bp (default: 200)\n";
print " -t NUM Use NUM threads for computing the distance estimates\n";
print " --prefix NAME Use NAME as the prefix for the outfiles\n";
print " --mind D Set the minimum distance estimate to test to be D. This should be a negative\n";
print " number if contigs are expected to overlap. Defaults to -99bp.\n";
print " --mina N Set the minimum alignment length to be N.\n";
print " --help This help output.\n";
}
sub runCmd
{
my($c) = @_;
print "$c\n";
system($c);
}
# Check whether the programs are found and executable
sub checkDependency
{
while(my $program = shift) {
my $ret = system("/bin/bash -c \"hash $program\"");
if($ret != 0) {
print STDERR "Could not find program $program. Please install it or update your PATH.\n";
print STDERR "Return: $ret\n";
exit(1);
}
}
}
|