/usr/share/pyshared/Bio/AlignIO/NexusIO.py is in python-biopython 1.58-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | # Copyright 2008-2010 by Peter Cock. All rights reserved.
#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""
Bio.AlignIO support for the "nexus" file format.
You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).
See also the Bio.Nexus module (which this code calls internally),
as this offers more than just accessing the alignment or its
sequences as SeqRecord objects.
"""
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Nexus import Nexus
from Bio.Align import MultipleSeqAlignment
from Interfaces import AlignmentWriter
from Bio import Alphabet
#You can get a couple of example files here:
#http://www.molecularevolution.org/resources/fileformats/
#This is a generator function!
def NexusIterator(handle, seq_count=None):
"""Returns SeqRecord objects from a Nexus file.
Thus uses the Bio.Nexus module to do the hard work.
You are expected to call this function via Bio.SeqIO or Bio.AlignIO
(and not use it directly).
NOTE - We only expect ONE alignment matrix per Nexus file,
meaning this iterator will only yield one MultipleSeqAlignment.
"""
n = Nexus.Nexus(handle)
if not n.matrix:
#No alignment found
raise StopIteration
#Bio.Nexus deals with duplicated names by adding a '.copy' suffix.
#The original names and the modified names are kept in these two lists:
assert len(n.unaltered_taxlabels) == len(n.taxlabels)
if seq_count and seq_count != len(n.unaltered_taxlabels):
raise ValueError("Found %i sequences, but seq_count=%i" \
% (len(n.unaltered_taxlabels), seq_count))
#ToDo - Can we extract any annotation too?
records = (SeqRecord(n.matrix[new_name], id=new_name, \
name=old_name, description="") \
for old_name, new_name \
in zip (n.unaltered_taxlabels, n.taxlabels))
#All done
yield MultipleSeqAlignment(records, n.alphabet)
class NexusWriter(AlignmentWriter):
"""Nexus alignment writer.
Note that Nexus files are only expected to hold ONE alignment
matrix.
You are expected to call this class via the Bio.AlignIO.write() or
Bio.SeqIO.write() functions.
"""
def write_file(self, alignments):
"""Use this to write an entire file containing the given alignments.
alignments - A list or iterator returning MultipleSeqAlignment objects.
This should hold ONE and only one alignment.
"""
align_iter = iter(alignments) #Could have been a list
try:
first_alignment = align_iter.next()
except StopIteration:
first_alignment = None
if first_alignment is None:
#Nothing to write!
return 0
#Check there is only one alignment...
try:
second_alignment = align_iter.next()
except StopIteration:
second_alignment = None
if second_alignment is not None:
raise ValueError("We can only write one Alignment to a Nexus file.")
#Good. Actually write the single alignment,
self.write_alignment(first_alignment)
return 1 #we only support writing one alignment!
def write_alignment(self, alignment):
#Creates an empty Nexus object, adds the sequences,
#and then gets Nexus to prepare the output.
if len(alignment) == 0:
raise ValueError("Must have at least one sequence")
if alignment.get_alignment_length() == 0:
raise ValueError("Non-empty sequences are required")
minimal_record = "#NEXUS\nbegin data; dimensions ntax=0 nchar=0; " \
+ "format datatype=%s; end;" \
% self._classify_alphabet_for_nexus(alignment._alphabet)
n = Nexus.Nexus(minimal_record)
n.alphabet = alignment._alphabet
for record in alignment:
n.add_sequence(record.id, record.seq.tostring())
n.write_nexus_data(self.handle)
def _classify_alphabet_for_nexus(self, alphabet):
"""Returns 'protein', 'dna', 'rna' based on the alphabet (PRIVATE).
Raises an exception if this is not possible."""
#Get the base alphabet (underneath any Gapped or StopCodon encoding)
a = Alphabet._get_base_alphabet(alphabet)
if not isinstance(a, Alphabet.Alphabet):
raise TypeError("Invalid alphabet")
elif isinstance(a, Alphabet.ProteinAlphabet):
return "protein"
elif isinstance(a, Alphabet.DNAAlphabet):
return "dna"
elif isinstance(a, Alphabet.RNAAlphabet):
return "rna"
else:
#Must be something like NucleotideAlphabet or
#just the generic Alphabet (default for fasta files)
raise ValueError("Need a DNA, RNA or Protein alphabet")
if __name__ == "__main__":
from StringIO import StringIO
print "Quick self test"
print
print "Repeated names without a TAXA block"
handle = StringIO("""#NEXUS
[TITLE: NoName]
begin data;
dimensions ntax=4 nchar=50;
format interleave datatype=protein gap=- symbols="FSTNKEYVQMCLAWPHDRIG";
matrix
CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ----
ALEU_HORVU MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG
CATH_HUMAN ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK----
CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X
;
end;
""")
for a in NexusIterator(handle):
print a
for r in a:
print repr(r.seq), r.name, r.id
print "Done"
print
print "Repeated names with a TAXA block"
handle = StringIO("""#NEXUS
[TITLE: NoName]
begin taxa
CYS1_DICDI
ALEU_HORVU
CATH_HUMAN
CYS1_DICDI;
end;
begin data;
dimensions ntax=4 nchar=50;
format interleave datatype=protein gap=- symbols="FSTNKEYVQMCLAWPHDRIG";
matrix
CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ----
ALEU_HORVU MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG
CATH_HUMAN ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK----
CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X
;
end;
""")
for a in NexusIterator(handle):
print a
for r in a:
print repr(r.seq), r.name, r.id
print "Done"
print
print "Reading an empty file"
assert 0 == len(list(NexusIterator(StringIO())))
print "Done"
print
print "Writing..."
handle = StringIO()
NexusWriter(handle).write_file([a])
handle.seek(0)
print handle.read()
handle = StringIO()
try:
NexusWriter(handle).write_file([a,a])
assert False, "Should have rejected more than one alignment!"
except ValueError:
pass
|