/usr/share/circos/tools/matrix/unmatrix is in circos-tools 0.22-2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 | #!/usr/bin/env perl
=pod
=head1 NAME
unmatrix - convert a matrix into a list
=head1 SYNOPSIS
cat matrix.txt | ./unmatrix [-sort] [-nsort] [-bycol] [-byrow] [-sym] [-delim ,]
=head1 DESCRIPTION
Convert a tabular data file
- a b c
d 1 - 0
e 2 5 2
f 1 2 3
into one in which each table cell is reported individually on separate lines.
d a 1
d b -
d c 0
e a 2
...
This script can function together with the 'matrix' script. For example, if you want to multiply all values in a matrix by 2 on the command line
> cat 468x468.txt | ./unmatrix | awk '{print $1,$2,2*$3}' | ./matrix -nsort -missing -0
You can replace the awk call with your own filter. Here you can remove lines and modify values. The subsequent call to 'matrix' will cast the data in a table format again.
=head1 OPTIONS
=head2 -byrow
This is the default behaviour. See -bycol below.
In other words, -byrow does nothing and is allowed as a command-line flag to remind you of the output format.
=head2 -bycol
By default, the script is row-dominant - the triplet returned is
row col value
When -nsort or -sort are used (see below), for each sorted row value the columns are sorted.
For column-dominant behaviour, use -bycol. This will produce
col row value
and if you asked for sorting, for each sorted column value the rows will be sorted.
=head2 -nsort
Sort row and column names numerically.
=head2 -sort
Sort row and column names asciibetically.
=head2 -sym
If your matrix is symmetric, using this flag will exclude all entries above the diagonal.
a b c
a x x
b x x - not reported
c
A sanity check is done that the matrix is indeed symmetric (for every cell x,y=z there exists y,x=z). If it is not, the script will quit with an error.
This feature applies only to matrixes whose row and column names are the same.
-sym can be combined with -sort/-nsort
=head2 -delim
Using -delim STRING sets the field separator.
By default this is general whitespace, which should be correct for most spaces (comma separated, tab separated). To specify a tab delimiter explicitly, use -delim "tab" or the CTRV-V bash trick.
=cut
our $VERSION = "0.11.1";
=pod
=head1 HISTORY
=over
=item 16 Jun 2014
Fixed -sym bug.
=item 10 Jun 2014
Added -sym.
=item 3 Jun 2014
Added -byrow, -bycol, -nsort and sort.
=item 6 May 2014
Started.
=back
=head1 BUGS
=head1 AUTHOR
Martin Krzywinski
=head1 CONTACT
Martin Krzywinski
Genome Sciences Centre
Vancouver BC Canada
www.bcgsc.ca
martink@bcgsc.ca
=cut
use strict;
use Config::General;
use Data::Dumper;
use FindBin;
use File::Basename;
use Math::VecStat qw(min max average sum minabs maxabs);
use Getopt::Long;
use Pod::Usage;
use lib "$FindBin::RealBin";
use lib "$FindBin::RealBin/../lib";
use lib "$FindBin::RealBin/lib";
use vars qw(%OPT %CONF);
use Statistics::Descriptive;
use Math::Round;
GetOptions(\%OPT,
"man",
"bycol",
"byrow",
"nsort",
"sort",
"sym",
"delim",
"help",
"version",
"debug+");
if($OPT{version}) {
printinfo($VERSION);exit;
}
pod2usage() if $OPT{help};
pod2usage(-verbose=>2) if $OPT{man};
loadconfiguration($OPT{configfile});
populateconfiguration(); # copy command line options to config hash
validateconfiguration();
if($CONF{debug} > 1) {
$Data::Dumper::Pad = "debug parameters";
$Data::Dumper::Indent = 1;
$Data::Dumper::Quotekeys = 0;
$Data::Dumper::Terse = 1;
print Dumper(\%CONF);
}
my $inputhandle;
if(my $file = $ARGV[0]) {
die "No such file $file" unless -e $file;
open(FILE,$file);
$inputhandle = \*FILE;
} else {
$inputhandle = \*STDIN;
}
my $matrix;
my @header;
my $data;
# read all lines
while(<$inputhandle>) {
chomp;
next if /^\s*#/;
my @cols = split($CONF{delim},$_);
if(! @header) {
@header = @cols;
next;
}
die "Found lines with different number of columns" unless @cols == @header;
for my $i (1..@cols-1) {
if($CONF{bycol}) {
$data->{$header[$i]}{$cols[0]} = $cols[$i];
} else {
$data->{$cols[0]}{$header[$i]} = $cols[$i];
}
}
}
if($CONF{sym}) {
my @rows = keys %$data;
my $report;
for my $ri (0..@rows-1) {
my $row = $rows[$ri];
my @cols = keys %{$data->{$row}};
if(@rows != @cols) {
die sprintf("You used the -sym flag but the matrix is not square [%d x %d].",int(@rows),int(@cols));
}
for my $ci (0..@cols-1) {
my $col = $cols[$ci];
if(defined $data->{$row}{$col}
&&
defined $data->{$col}{$row}
&&
$data->{$row}{$col} eq $data->{$col}{$row}) {
if($ci <= $ri) {
$report->{$row}{$col} = $data->{$row}{$col};
}
} else {
printinfo("You used -sym but the matrix is not symmetric.");
if(! defined $data->{$col}{$row}) {
printinfo("The entry [$row,$col] exists, but not [$col,$row].");
} elsif (! defined $data->{$row}{$col}) {
printinfo("The entry [$col,$row] exists, but not [$row,$col].");
} elsif ($data->{$row}{$col} ne $data->{$col}{$row}) {
printinfo(sprintf("The entry [%s,%s]=%s and [%s,%s]=%s do not match.",$row,$col,$data->{$row}{$col},$col,$row,$data->{$col}{$row}));
}
exit;
}
}
}
$data = $report;
}
my @rows = sortmat(keys %$data);
for my $r (@rows) {
my @cols = sortmat(keys %{$data->{$r}});
for my $c (@cols) {
printinfo($r,$c,$data->{$r}{$c});
}
}
sub sortmat {
my @x = @_;
if($CONF{nsort}) {
return sort {$a <=> $b} @x;
} elsif ($CONF{sort}) {
return sort @x;
} else {
return @x;
}
}
sub validateconfiguration {
$CONF{delim} ||= " ";
$CONF{delim} = "\t" if $CONF{delim} =~ /^tab$/i;
}
################################################################
#
# *** Do NOT EDIT BELOW THIS LINE ***
#
################################################################
################################################################
################################################################
################################################################
sub populateconfiguration {
foreach my $key (keys %OPT) {
$CONF{$key} = $OPT{$key};
}
}
sub loadconfiguration {
my $file = shift;
my ($scriptname) = fileparse($0);
if(-e $file && -r _) {
# great the file exists
} elsif (-e "/home/$ENV{LOGNAME}/.$scriptname.conf" && -r _) {
$file = "/home/$ENV{LOGNAME}/.$scriptname.conf";
} elsif (-e "$FindBin::RealBin/$scriptname.conf" && -r _) {
$file = "$FindBin::RealBin/$scriptname.conf";
} elsif (-e "$FindBin::RealBin/etc/$scriptname.conf" && -r _) {
$file = "$FindBin::RealBin/etc/$scriptname.conf";
} else {
return undef;
}
$OPT{configfile} = $file;
my $conf = new Config::General(-ConfigFile=>$file,
-AllowMultiOptions=>"yes",
-LowerCaseNames=>1,
-AutoTrue=>1);
%CONF = $conf->getall;
}
sub printdebug {
printinfo("debug",@_) if $CONF{debug};
}
sub printinfo {
print join(" ",@_),"\n";
}
|