/usr/bin/mash2matrix is in seer 1.1.2-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | #!/usr/bin/perl -w
use strict;
use warnings;
my $usage_message = <<USAGE;
Usage: mash2matrix <matrix_in> > all_distances.csv
Uses mash (http://mash.readthedocs.io/) to create all pairwise distances
between samples.
NB: the input to mash should be the same order as the .pheno file
generate <assemblies.fa> with:
cut -f 1 metadata.pheno | tr '\n' ' '
Then run
mash sketch -o reference <assemblies.fa>
mash dist reference.msh reference.msh > mash_distances.txt
./mash2matrix.pl mash_distances.txt > all_distances.csv
USAGE
my $dist_file = $ARGV[0];
if (!-e $dist_file)
{
print STDERR $usage_message;
}
else
{
open(INPUT, $dist_file) || die("Could not open $dist_file\n");
my %row_idx;
my @mat_out;
my $i = 0;
while (my $line_in = <INPUT>)
{
chomp $line_in;
my ($el1, $el2, $dist, @junk) = split("\t", $line_in);
if (!defined($row_idx{$el1}))
{
$row_idx{$el1} = $i++;
}
if (!defined($row_idx{$el2}))
{
$row_idx{$el2} = $i++;
}
$mat_out[$row_idx{$el1}][$row_idx{$el2}] = $dist;
}
for (my $i = 0; $i < scalar(@{$mat_out[0]}); $i++)
{
print join(",", @{$mat_out[$i]}) . "\n";
}
}
exit(0);
|