/usr/bin/maf-swap is in last-align 393-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | #! /usr/bin/env python
# Read MAF-format alignments, and write them, after moving the Nth
# sequence to the top in each alignment.
# Before writing, if the top sequence would be on the - strand, then
# flip all the strands. But don't do this if the top sequence is
# translated DNA.
# Seems to work with Python 2.x, x>=4
import fileinput, itertools, optparse, os, signal, string, sys
def filterComments(lines):
for i in lines:
if i.startswith("#"): print i,
else: yield i
def mafInput(lines):
for k, v in itertools.groupby(lines, str.isspace):
if not k: yield list(v)
def indexOfNthSequence(mafLines, n):
for i, line in enumerate(mafLines):
if line.startswith("s"):
if n == 1: return i
n -= 1
raise Exception("encountered an alignment with too few sequences")
def rangeOfNthSequence(mafLines, n):
"""Get the range of lines associated with the Nth sequence."""
start = indexOfNthSequence(mafLines, n)
stop = start + 1
while stop < len(mafLines):
line = mafLines[stop]
if not (line.startswith("q") or line.startswith("i")): break
stop += 1
return start, stop
complement = string.maketrans('ACGTNSWRYKMBDHVacgtnswrykmbdhv',
'TGCANSWYRMKVHDBtgcanswyrmkvhdb')
# doesn't handle "U" in RNA sequences
def revcomp(seq):
return seq[::-1].translate(complement)
def flippedMafS(words):
alnStart = int(words[2])
alnSize = int(words[3])
strand = words[4]
seqSize = int(words[5])
alnString = words[6]
newStart = seqSize - alnStart - alnSize
if strand == "-": newStrand = "+"
else: newStrand = "-"
newString = revcomp(alnString)
out = words[0], words[1], newStart, alnSize, newStrand, seqSize, newString
return map(str, out)
def flippedMafP(words):
flippedString = words[1][::-1]
return words[:1] + [flippedString]
def flippedMafQ(words):
qualityString = words[2]
flippedString = qualityString[::-1]
return words[:2] + [flippedString]
def flippedMafLine(mafLine):
words = mafLine.split()
if words[0] == "s": return flippedMafS(words)
elif words[0] == "p": return flippedMafP(words)
elif words[0] == "q": return flippedMafQ(words)
else: return words
def maxlen(s):
return max(map(len, s))
def sLineFieldWidths(mafLines):
sLines = (i for i in mafLines if i[0] == "s")
sColumns = zip(*sLines)
return map(maxlen, sColumns)
def joinedMafS(words, fieldWidths):
formatParams = itertools.chain(*zip(fieldWidths, words))
return "%*s %-*s %*s %*s %*s %*s %*s\n" % tuple(formatParams)
def joinedMafLine(words, fieldWidths):
if words[0] == "s":
return joinedMafS(words, fieldWidths)
elif words[0] == "q":
words = words[:2] + [""] * 4 + words[2:]
return joinedMafS(words, fieldWidths)
elif words[0] == "p":
words = words[:1] + [""] * 5 + words[1:]
return joinedMafS(words, fieldWidths)
else:
return " ".join(words) + "\n"
def flippedMaf(mafLines):
flippedLines = map(flippedMafLine, mafLines)
fieldWidths = sLineFieldWidths(flippedLines)
return (joinedMafLine(i, fieldWidths) for i in flippedLines)
def isCanonicalStrand(mafLine):
words = mafLine.split()
strand = words[4]
if strand == "+": return True
alnString = words[6]
if "/" in alnString or "\\" in alnString: return True # frameshifts
alnSize = int(words[3])
gapCount = alnString.count("-")
if len(alnString) - gapCount < alnSize: return True # translated DNA
return False
def mafSwap(opts, args):
inputLines = fileinput.input(args)
for mafLines in mafInput(filterComments(inputLines)):
start, stop = rangeOfNthSequence(mafLines, opts.n)
mafLines[1:stop] = mafLines[start:stop] + mafLines[1:start]
if not isCanonicalStrand(mafLines[1]):
mafLines = flippedMaf(mafLines)
for i in mafLines: print i,
print # blank line after each alignment
if __name__ == "__main__":
signal.signal(signal.SIGPIPE, signal.SIG_DFL) # avoid silly error message
usage = "%prog [options] my-alignments.maf"
description = "Change the order of sequences in MAF-format alignments."
op = optparse.OptionParser(usage=usage, description=description)
op.add_option("-n", type="int", default=2,
help="move the Nth sequence to the top (default: %default)")
(opts, args) = op.parse_args()
if opts.n < 1: op.error("option -n: should be >= 1")
try: mafSwap(opts, args)
except KeyboardInterrupt: pass # avoid silly error message
except Exception, e:
prog = os.path.basename(sys.argv[0])
sys.exit(prog + ": error: " + str(e))
|