/usr/bin/nucmer is in mummer 3.23~dfsg-2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 | #!/usr/bin/perl
#-------------------------------------------------------------------------------
# Programmer: Adam M Phillippy, The Institute for Genomic Research
# File: nucmer
# Date: 04 / 09 / 03
#
# Usage:
# nucmer [options] <Reference> <Query>
#
# Try 'nucmer -h' for more information.
#
# Purpose: To create alignments between two multi-FASTA inputs by using
# the MUMmer matching and clustering algorithms.
#
#-------------------------------------------------------------------------------
use lib "/usr/share/perl5/";
use Foundation;
use File::Spec::Functions;
use strict;
my $AUX_BIN_DIR = "/usr/lib/mummer";
my $BIN_DIR = "/usr/bin";
my $SCRIPT_DIR = "/usr/share/perl5/";
my $VERSION_INFO = q~
NUCmer (NUCleotide MUMmer) version 3.1
~;
my $HELP_INFO = q~
USAGE: nucmer [options] <Reference> <Query>
DESCRIPTION:
nucmer generates nucleotide alignments between two mutli-FASTA input
files. The out.delta output file lists the distance between insertions
and deletions that produce maximal scoring alignments between each
sequence. The show-* utilities know how to read this format.
MANDATORY:
Reference Set the input reference multi-FASTA filename
Query Set the input query multi-FASTA filename
OPTIONS:
--mum Use anchor matches that are unique in both the reference
and query
--mumcand Same as --mumreference
--mumreference Use anchor matches that are unique in in the reference
but not necessarily unique in the query (default behavior)
--maxmatch Use all anchor matches regardless of their uniqueness
-b|breaklen Set the distance an alignment extension will attempt to
extend poor scoring regions before giving up (default 200)
--[no]banded Enforce absolute banding of dynamic programming matrix
based on diagdiff parameter EXPERIMENTAL (default no)
-c|mincluster Sets the minimum length of a cluster of matches (default 65)
--[no]delta Toggle the creation of the delta file (default --delta)
--depend Print the dependency information and exit
-D|diagdiff Set the maximum diagonal difference between two adjacent
anchors in a cluster (default 5)
-d|diagfactor Set the maximum diagonal difference between two adjacent
anchors in a cluster as a differential fraction of the gap
length (default 0.12)
--[no]extend Toggle the cluster extension step (default --extend)
-f
--forward Use only the forward strand of the Query sequences
-g|maxgap Set the maximum gap between two adjacent matches in a
cluster (default 90)
-h
--help Display help information and exit
-l|minmatch Set the minimum length of a single match (default 20)
-o
--coords Automatically generate the original NUCmer1.1 coords
output file using the 'show-coords' program
--[no]optimize Toggle alignment score optimization, i.e. if an alignment
extension reaches the end of a sequence, it will backtrack
to optimize the alignment score instead of terminating the
alignment at the end of the sequence (default --optimize)
-p|prefix Set the prefix of the output files (default "out")
-r
--reverse Use only the reverse complement of the Query sequences
--[no]simplify Simplify alignments by removing shadowed clusters. Turn
this option off if aligning a sequence to itself to look
for repeats (default --simplify)
-V
--version Display the version information and exit
~;
my $USAGE_INFO = q~
USAGE: nucmer [options] <Reference> <Query>
~;
my @DEPEND_INFO =
(
"$BIN_DIR/mummer",
"$BIN_DIR/mgaps",
"$BIN_DIR/show-coords",
"$AUX_BIN_DIR/postnuc",
"$AUX_BIN_DIR/prenuc",
"$SCRIPT_DIR/Foundation.pm"
);
my %DEFAULT_PARAMETERS =
(
"OUTPUT_PREFIX" => "out", # prefix for all output files
"MATCH_ALGORITHM" => "-mumreference", # match finding algo switch
"MATCH_DIRECTION" => "-b", # match direction switch
"MIN_MATCH" => "20", # minimum match size
"MAX_GAP" => "90", # maximum gap between matches
"MIN_CLUSTER" => "65", # minimum cluster size
"DIAG_DIFF" => "5", # diagonal difference absolute
"DIAG_FACTOR" => ".12", # diagonal difference fraction
"BREAK_LEN" => "200", # extension break length
"POST_SWITCHES" => "" # switches for the post processing
);
sub main ( )
{
my $tigr; # TIGR::Foundation object
my @err; # Error variable
my $ref_file; # path of the reference input file
my $qry_file; # path of the query input file
#-- The command line options for the various programs
my $pfx = $DEFAULT_PARAMETERS { "OUTPUT_PREFIX" };
my $algo = $DEFAULT_PARAMETERS { "MATCH_ALGORITHM" };
my $mdir = $DEFAULT_PARAMETERS { "MATCH_DIRECTION" };
my $size = $DEFAULT_PARAMETERS { "MIN_MATCH" };
my $gap = $DEFAULT_PARAMETERS { "MAX_GAP" };
my $clus = $DEFAULT_PARAMETERS { "MIN_CLUSTER" };
my $ddiff = $DEFAULT_PARAMETERS { "DIAG_DIFF" };
my $dfrac = $DEFAULT_PARAMETERS { "DIAG_FACTOR" };
my $blen = $DEFAULT_PARAMETERS { "BREAK_LEN" };
my $psw = $DEFAULT_PARAMETERS { "POST_SWITCHES" };
my $fwd; # if true, use forward strand
my $rev; # if true, use reverse strand
my $maxmatch; # matching algorithm switches
my $mumreference;
my $mum;
my $banded = 0; # if true, enforce absolute dp banding
my $extend = 1; # if true, extend clusters
my $delta = 1; # if true, create the delta file
my $optimize = 1; # if true, optimize alignment scores
my $simplify = 1; # if true, simplify shadowed alignments
my $generate_coords;
#-- Initialize TIGR::Foundation
$tigr = new TIGR::Foundation;
if ( !defined ($tigr) ) {
print (STDERR "ERROR: TIGR::Foundation could not be initialized");
exit (1);
}
#-- Set help and usage information
$tigr->setHelpInfo ($HELP_INFO);
$tigr->setUsageInfo ($USAGE_INFO);
$tigr->setVersionInfo ($VERSION_INFO);
$tigr->addDependInfo (@DEPEND_INFO);
#-- Get command line parameters
$err[0] = $tigr->TIGR_GetOptions
(
"maxmatch" => \$maxmatch,
"mumcand" => \$mumreference,
"mumreference" => \$mumreference,
"mum" => \$mum,
"b|breaklen=i" => \$blen,
"banded!" => \$banded,
"c|mincluster=i" => \$clus,
"delta!" => \$delta,
"D|diagdiff=i" => \$ddiff,
"d|diagfactor=f" => \$dfrac,
"extend!" => \$extend,
"f|forward" => \$fwd,
"g|maxgap=i" => \$gap,
"l|minmatch=i" => \$size,
"o|coords" => \$generate_coords,
"optimize!" => \$optimize,
"p|prefix=s" => \$pfx,
"r|reverse" => \$rev,
"simplify!" => \$simplify
);
#-- Check if the parsing was successful
if ( $err[0] == 0 || $#ARGV != 1 ) {
$tigr->printUsageInfo( );
print (STDERR "Try '$0 -h' for more information.\n");
exit (1);
}
$ref_file = File::Spec->rel2abs ($ARGV[0]);
$qry_file = File::Spec->rel2abs ($ARGV[1]);
#-- Set up the program parameters
if ( $fwd && $rev ) {
$mdir = "-b";
} elsif ( $fwd ) {
$mdir = "";
} elsif ( $rev ) {
$mdir = "-r";
}
if ( ! $extend ) {
$psw .= "-e ";
}
if ( ! $delta ) {
$psw .= "-d ";
}
if ( ! $optimize ) {
$psw .= "-t ";
}
if ( ! $simplify ) {
$psw .= "-s ";
}
undef (@err);
$err[0] = 0;
if ( $mum ) {
$err[0] ++;
$algo = "-mum";
}
if ( $mumreference ) {
$err[0] ++;
$algo = "-mumreference";
}
if ( $maxmatch ) {
$err[0] ++;
$algo = "-maxmatch";
}
if ( $err[0] > 1 ) {
$tigr->printUsageInfo( );
print (STDERR "ERROR: Multiple matching algorithms selected\n");
print (STDERR "Try '$0 -h' for more information.\n");
exit (1);
}
#-- Set up the program path names
my $algo_path = "$BIN_DIR/mummer";
my $mgaps_path = "$BIN_DIR/mgaps";
my $prenuc_path = "$AUX_BIN_DIR/prenuc";
my $postnuc_path = "$AUX_BIN_DIR/postnuc";
my $showcoords_path = "$BIN_DIR/show-coords";
#-- Check that the files needed are all there and readable/writable
{
undef (@err);
if ( !$tigr->isExecutableFile ($algo_path) ) {
push (@err, $algo_path);
}
if ( !$tigr->isExecutableFile ($mgaps_path) ) {
push (@err, $mgaps_path);
}
if ( !$tigr->isExecutableFile ($prenuc_path) ) {
push (@err, $prenuc_path);
}
if ( !$tigr->isExecutableFile ($postnuc_path) ) {
push (@err, $postnuc_path);
}
if ( !$tigr->isReadableFile ($ref_file) ) {
push (@err, $ref_file);
}
if ( !$tigr->isReadableFile ($qry_file) ) {
push (@err, $qry_file);
}
if ( !$tigr->isCreatableFile ("$pfx.ntref") ) {
if ( !$tigr->isWritableFile ("$pfx.ntref") ) {
push (@err, "$pfx.ntref");
}
}
if ( !$tigr->isCreatableFile ("$pfx.mgaps") ) {
if ( !$tigr->isWritableFile ("$pfx.mgaps") ) {
push (@err, "$pfx.mgaps");
}
}
if ( !$tigr->isCreatableFile ("$pfx.delta") ) {
if ( !$tigr->isWritableFile ("$pfx.delta") ) {
push (@err, "$pfx.delta");
}
}
if ( $generate_coords ) {
if ( !$tigr->isExecutableFile ($showcoords_path) ) {
push (@err, $showcoords_path);
}
if ( !$tigr->isCreatableFile ("$pfx.coords") ) {
if ( !$tigr->isWritableFile ("$pfx.coords") ) {
push (@err, "$pfx.coords");
}
}
}
#-- If 1 or more files could not be processed, terminate script
if ( $#err >= 0 ) {
$tigr->logError
("ERROR: The following critical files could not be used", 1);
while ( $#err >= 0 ) {
$tigr->logError (pop(@err), 1);
}
$tigr->logError
("Check your paths and file permissions and try again", 1);
$tigr->bail( );
}
}
#-- Run prenuc and assert return value is zero
print (STDERR "1: PREPARING DATA\n");
$err[0] = $tigr->runCommand
("$prenuc_path $ref_file > $pfx.ntref");
if ( $err[0] != 0 ) {
$tigr->bail
("ERROR: prenuc returned non-zero\n");
}
#-- Run mummer | mgaps and assert return value is zero
print (STDERR "2,3: RUNNING mummer AND CREATING CLUSTERS\n");
open(ALGO_PIPE, "$algo_path $algo $mdir -l $size -n $pfx.ntref $qry_file |")
or $tigr->bail ("ERROR: could not open $algo_path output pipe $!");
open(CLUS_PIPE, "| $mgaps_path -l $clus -s $gap -d $ddiff -f $dfrac > $pfx.mgaps")
or $tigr->bail ("ERROR: could not open $mgaps_path input pipe $!");
while ( <ALGO_PIPE> ) {
print CLUS_PIPE
or $tigr->bail ("ERROR: could not write to $mgaps_path pipe $!");
}
$err[0] = close(ALGO_PIPE);
$err[1] = close(CLUS_PIPE);
if ( $err[0] == 0 || $err[1] == 0 ) {
$tigr->bail ("ERROR: mummer and/or mgaps returned non-zero\n");
}
#-- Run postnuc and assert return value is zero
print (STDERR "4: FINISHING DATA\n");
if ( $banded )
{
$err[0] = $tigr->runCommand
("$postnuc_path $psw -b $blen -B $ddiff $ref_file $qry_file $pfx < $pfx.mgaps");
}
else
{
$err[0] = $tigr->runCommand
("$postnuc_path $psw -b $blen $ref_file $qry_file $pfx < $pfx.mgaps");
}
if ( $err[0] != 0 ) {
$tigr->bail ("ERROR: postnuc returned non-zero\n");
}
#-- If the -o flag was set, run show-coords using NUCmer1.1 settings
if ( $generate_coords ) {
print (STDERR "5: GENERATING COORDS FILE\n");
$err[0] = $tigr->runCommand
("$showcoords_path -r $pfx.delta > $pfx.coords");
if ( $err[0] != 0 ) {
$tigr->bail ("ERROR: show-coords returned non-zero\n");
}
}
#-- Remove the temporary output
$err[0] = unlink ("$pfx.ntref", "$pfx.mgaps");
if ( $err[0] != 2 ) {
$tigr->logError ("WARNING: there was a problem deleting".
" the temporary output files", 1);
}
#-- Return success
return (0);
}
exit ( main ( ) );
#-- END OF SCRIPT
|