/usr/bin/vcf_melt is in pyvcf 0.6.8-1ubuntu4.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | #!/usr/bin/python3
""" Melt a VCF file into a tab delimited set of calls, one per line
VCF files have all the calls from different samples on one line. This
script reads vcf on stdin and writes all calls to stdout in tab delimited
format with one call in one sample per line. This makes it easy to find
a given sample's genotype with, say, grep.
"""
import sys
import csv
import vcf
out = csv.writer(sys.stdout, delimiter='\t')
if len(sys.argv) > 1:
inp = file(sys.argv[1])
else:
inp = sys.stdin
reader = vcf.VCFReader(inp)
formats = reader.formats.keys()
infos = reader.infos.keys()
header = ["SAMPLE"] + formats + ['FILTER', 'CHROM', 'POS', 'REF', 'ALT', 'ID'] \
+ ['info.' + x for x in infos]
out.writerow(header)
def flatten(x):
if type(x) == type([]):
x = ','.join(map(str, x))
return x
for record in reader:
info_row = [flatten(record.INFO.get(x, None)) for x in infos]
fixed = [record.CHROM, record.POS, record.REF, record.ALT, record.ID]
for sample in record.samples:
row = [sample.sample]
# Format fields not present will simply end up "blank"
# in the output
row += [flatten(getattr(sample.data, x, None)) for x in formats]
row += [record.FILTER or '.']
row += fixed
row += info_row
out.writerow(row)
|