/usr/lib/python2.7/dist-packages/cogent/format/bedgraph.py is in python-cogent 1.9-9.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | from cogent.util.misc import get_merged_by_value_coords
__author__ = "Gavin Huttley"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Gavin Huttley"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Gavin Huttley"
__email__ = "gavin.huttley@anu.edu.au"
__status__ = "alpha"
# following from https://cgwb.nci.nih.gov/goldenPath/help/bedgraph.html
# track type=bedGraph name=track_label description=center_label
# visibility=display_mode color=r,g,b altColor=r,g,b
# priority=priority autoScale=on|off alwaysZero=on|off
# gridDefault=on|off maxHeightPixels=max:default:min
# graphType=bar|points viewLimits=lower:upper
# yLineMark=real-value yLineOnOff=on|off
# windowingFunction=maximum|mean|minimum smoothingWindow=off|2-16
# Data Values
# Bedgraph track data values can be integer or real, positive or negative
# values. Chromosome positions are specified as 0-relative. The first
# chromosome position is 0. The last position in a chromosome of length N
# would be N - 1. Only positions specified have data. Positions not
# specified do not have data and will not be graphed. All positions specified
# in the input data must be in numerical order. The bedGraph format has four
# columns of data:
bedgraph_fields = ('name', 'description', 'visibility', 'color', 'altColor',
'priority', 'autoScale', 'alwaysZero', 'gridDefault', 'maxHeightPixels',
'graphType', 'viewLimits', 'yLineMark', 'yLineOnOff', 'windowingFunction',
'smoothingWindow')
_booleans = ('autoScale', 'alwaysZero', 'gridDefault', 'yLineOnOff')
valid_values = dict(autoScale=['on', 'off'], graphType=['bar', 'points'],
windowingFunction=['maximum', 'mean', 'minimum'],
smoothingWindow=['off']+map(str,range(2,17)))
def raise_invalid_vals(key, val):
"""raises RuntimeError on invalid values for keys """
if key not in valid_values:
return True
if not str(val) in valid_values[key]:
raise AssertionError('Invalid bedgraph key/val pair: '\
+ 'got %s=%s; valid values are %s' % (key, val, valid_values[key]))
def booleans(key, val):
"""returns ucsc formatted boolean"""
if val in (1, True, 'on', 'On', 'ON'):
val = 'on'
else:
val = 'off'
return val
def get_header(name=None, description=None, color=None, **kwargs):
"""returns header line for bedgraph"""
min_header = 'track type=bedGraph name="%(name)s" '\
+ 'description="%(description)s" color=%(color)s'
assert None not in (name, description, color)
header = [min_header % {'name': name, 'description': description,
'color': ','.join(map(str,color))}]
if kwargs:
if not set(kwargs) <= set(bedgraph_fields):
not_allowed = set(kwargs) - set(bedgraph_fields)
raise RuntimeError(
"incorrect arguments provided to bedgraph %s" %
str(list(not_allowed)))
if 'altColor' in kwargs:
kwargs['altColor'] = ','.join(map(str,kwargs['altColor']))
header_suffix = []
for key in kwargs:
if key in _booleans:
kwargs[key] = booleans(key, kwargs[key])
raise_invalid_vals(key, kwargs[key])
header_suffix.append('%s=%s' % (key, kwargs[key]))
header += header_suffix
return ' '.join(header)
def bedgraph(chrom_start_end_val, digits=2, name=None, description=None,
color=None, **kwargs):
"""returns a bed formatted string. Input data must be provided as
[(chrom, start, end, val), ...]. These will be merged such that adjacent
records with the same value will be combined.
Arguments:
- name: track name
- description: track description
- color: (R,G,B) tuple of ints where max val of int is 255, e.g.
red is (255, 0, 0)
- **kwargs: keyword=val, .. valid bedgraph format modifiers
see https://cgwb.nci.nih.gov/goldenPath/help/bedgraph.html
"""
header = get_header(name=name, description=description,
color=color, **kwargs)
make_data_row = lambda x: '\t'.join(map(str, x))
# get independent spans for each chromosome
bedgraph_data = []
data = []
curr_chrom = None
for chrom, start, end, val in chrom_start_end_val:
if curr_chrom is None:
curr_chrom = chrom
if curr_chrom != chrom:
data = get_merged_by_value_coords(data, digits=digits)
bedgraph_data += [make_data_row([curr_chrom, s, e, v])
for s, e, v in data]
data = []
curr_chrom = chrom
else:
data.append([start, end, val])
if data != []:
data = get_merged_by_value_coords(data, digits=digits)
bedgraph_data += [make_data_row([curr_chrom, s, e, v])
for s, e, v in data]
bedgraph_data = [header] + bedgraph_data
return '\n'.join(bedgraph_data)
|