/usr/bin/determine-phred is in ea-utils 1.1.2+dfsg-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | #!/usr/bin/perl
use strict;
my $ssiz=7000; # sample size
if ($ARGV[0] =~ /^-[h?]/) {
print "Usage: determine-phred FILE
Reads a sam, fastq or pileup, possibly gzipped and returns the phred-scale,
either 64 or 33, based on a quick scan of the data in the file.
";
exit 0;
}
my $cnt;
my $dphred = 64;
if ($ARGV[0] =~ /\.gz$/) {
$ARGV[0] = "gunzip -c '$ARGV[0]'|";
}
my $qual;
my $comm;
my $fmt;
if (@ARGV > 1) {
my @mult = @ARGV;
for my $f (@mult) {
@ARGV = ($f);
determine();
print "$f\t$dphred\n";
}
} else {
determine();
print "$dphred\n";
}
sub determine {
$_ = <>;
if (/^\@/ && ! /^\@SQ\t/) {
# fastq
scalar <>; # read
$comm = scalar <>; # comment
if (!(substr($comm,0,1) eq '+')) {
die "Unknown file format\n";
}
$qual = <>;
chomp $qual;
$fmt = 'fq';
} elsif (/^\S+\t\d+\t[ACTGN]\t\d+\t\S+\t(\S+)$/i) {
$qual = $1;
$fmt = 'pileup';
} else {
# sam
$fmt = 'sam';
$qual = (split(/\t/, $_))[10];
}
if (!$qual) {
die "Unknown file format\n";
}
my $rc = 1;
while($qual) {
++$rc;
for (my $i =length($qual)/2; $i < length($qual); ++$i) {
if (ord(substr($qual,$i,1)) < 64) {
$dphred = 33;
$cnt=$ssiz; # last
last;
}
}
$qual = '';
last if ++$cnt >= $ssiz; # got enough
if ($fmt eq 'fq') {
# fastq
last if ! scalar <>; # id
last if ! scalar <>; # read
last if ! scalar <>; # comment
$qual = <>;
chomp $qual;
} elsif ($fmt eq 'pileup') {
$qual = (split(/\t/, $_))[5];
} else {
# sam
$qual = (split(/\t/, $_))[10];
}
}
if ($rc < 10) {
$dphred = 33;
}
}
|