/usr/bin/bam_coverage_windows is in libbio-graphics-perl 2.40-2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | #!/usr/bin/perl -w
use strict;
use List::Util 'sum';
use Getopt::Long;
use vars qw/$start $end $current_chr @scores %seen %highest $win $normal $bam/;
use constant WIN => 25;
# A script to make bam coverage windows in WIG/BED 4 format
# requires that samtools be installed
# This script operates on one bam file at a time. If you are comparing
# across bam files of diffenrent sizes (read numbers), take note
# of the normalization option.
# Sheldon McKay (sheldon.mckay@gmail.com)
BEGIN {
die "samtools must be installed!" unless `which samtools`;
}
GetOptions ("normalize=i" => \$normal,
"bam=s" => \$bam,
"window=i" => \$win);
$bam or usage();
open BAM, "samtools depth $bam |";
$win ||= WIN;
$start = 1;
$end = $win;
my $factor = normalization_factor($normal);
chomp(my $name = `basename $bam .bam`);
print qq(track type=wiggle_0 name="$name" description="read coverage for $bam (window size $win)"\n);
while (<BAM>) {
chomp;
my ($chr,$coord,$score) = split;
$current_chr ||= $chr;
check_sorted($chr,$coord);
if ( $chr ne $current_chr ||
$coord > $end ) {
open_window($chr,$coord);
}
push @scores, $score;
$current_chr = $chr;
}
sub open_window {
my ($chr,$coord) = @_;
if ($chr ne $current_chr) {
$seen{$current_chr}++;
}
# close the last window, if needed
if (@scores > 0) {
close_window();
}
$start = nearest_start($coord);
$end = $start + $win;
}
sub close_window {
my $sum = sum(@scores) or return 0;
my $score = $sum/$win;
$score *= $factor;
print join("\t",$current_chr,$start,$end,$score), "\n";
@scores = ();
exit 0 unless $score;
}
sub nearest_start {
my $start = shift;
return 1 if $start < $win;
while ($start % $win) {
$start--;
}
return $start;
}
sub normalization_factor {
return 1 unless my $nahm = shift;
print STDERR "Calculating total number of reads in $bam\n";
chomp(my $total = `samtools view -c $bam`);
print STDERR "$bam has $total reads\n";
return $total/$nahm;
}
# sanity check for unsorted bam
sub check_sorted {
my ($chr,$coord) = @_;
return 1 if $coord > ($highest{$chr} || 0);
$highest{$chr} = $coord;
return 1 unless $seen{$chr};
die_unsorted($chr,$coord);
}
sub die_unsorted {
my ($chr,$coord) = @_;
die "$chr $coord: $bam does not appear to be sorted\n",
"Please try 'samtools sort' first";
}
sub usage {
die '
Usage: perl bam_coverage_windows.pl -b bamfile -n 10_000_000 -w 25
-b name of bam file to read REQUIRED
-w window size (default 25)
-n normalized read number -- if you will be comparing multiple bam files
select the read number to normalize against.
all counts will be adjusted by a factor of:
actual readnum/normalized read num
'
}
|