This file is indexed.

/usr/lib/python2.7/dist-packages/cogent/draw/codon_usage.py is in python-cogent 1.9-9.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
#/usr/bin/env python
"""Provides different kinds of codon usage plots.

See individual docstrings for more info.
"""
from matplotlib import use, rc
use('Agg')  #suppress graphical rendering
rc('text', usetex=True)
rc('font', family='serif')  #required to match latex text and equations
from cogent.core.usage import UnsafeCodonUsage as CodonUsage
from cogent.draw.util import scatter_classic, \
   init_graph_display, init_ticks, set_axis_to_probs, \
    broadcast, plot_scatter, plot_filled_contour, \
    plot_contour_lines, standard_series_colors 
from pylab import plot, savefig, gca, text, figlegend

__author__ = "Stephanie Wilson"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Rob Knight", "Stephanie Wilson"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Production"

#module-level constants

#historical doublet order for fingerprint plot; not currently used, but
#same order that the colors were entered in. Matches Sueoka 2002.
doublet_order = ['GC','CG','GG','CU','CC','UC','AC','GU','UU','CA','AU',\
                 'AA','AG','GA','UA','UG']
color_order = ["#000000","#FF0000","#00FF00","#FFFF00",
          "#CC99FF","#FFCC99","#CCFFFF","#C0C0C0",
          "#6D6D6D","#2353FF","#00FFFF","#FF8800",
          "#238853","#882353","#EC008C","#000099"]
#map doublets to colors so we can make sure the same doublet always
#gets the same colors
doublets_to_colors = dict(zip(doublet_order, color_order))
#creates a dictionary for the amino acid labels, less to input
aa_labels={'ALANINE':'GCN', 'ARGININE4':'CGN', 'GLYCINE':'GGN',
          'LEUCINE4':'CTN', 'PROLINE':'CCN', 'SERINE4':'TCN',
          'THREONINE':'ACN', 'VALINE':'GTN'}
standard_series_colors=['k','r','g','b', 'm','c']

#scatterplot functions and helpers
def plot_cai_p3_scatter(data, graph_name='cai_p3_scat.png', **kwargs):
    """Outputs a CAI vs P3 scatter plot.

    expects data as ([P3s_1, CAIs_1, P3s_2, CAIs_2, ...])
    """
    plot_scatter(data, graph_shape='sqr', graph_grid=None,\
        x_label="$P_3$",y_label="CAI", prob_axes=True,**kwargs)
    savefig(graph_name)

def plot_p12_p3(data, graph_name='p12_p3.png', **kwargs):
    """Outputs a P12 versus P3 scatter graph, optionally including regression.

    expects data as [P3_1, P12_1, P3_2, P12_2, ...n ].
    """
    plot_scatter(data, graph_shape='sqr', graph_grid='/',\
        x_label="$P_3$",y_label="$P_{12}$", prob_axes=True, **kwargs)
    savefig(graph_name)

def plot_p123_gc(data, graph_name='p123_gc.png', use_p3_as_x=False, **kwargs):
    """Output a scatter plot of p1,p2,p3 vs gc content
    
    Expects data as array with rows as GC, P1, P2, P3
    p1=blue, p2=green, p3=red

    """
    #unpack common x axis, and decide on series names
    if use_p3_as_x:
        series_names = ['$P_1$', '$P_2$']
        colors=['b','g']
        x_label='$P_3$'
        y_label='$P_{12}$'
        xy_pairs = [data[3], data[1], data[3], data[2]]
    else:
        series_names = ['$P_1$', '$P_2$', '$P_3$']
        colors=['b','g','r']
        x_label='GC'
        y_label='$P_{123}$'
        xy_pairs = [data[0], data[1], data[0], data[2], data[0], data[3]]
    
    #plot points and write graph
    plot_scatter(xy_pairs, graph_grid='/',x_label=x_label,y_label=y_label,
        series_names=series_names, prob_axes=True, **kwargs)
    savefig(graph_name)

def plot_fingerprint(data, alpha=0.7, \
    show_legend=True, graph_name='fingerprint.png', has_mean=True,
    which_blocks='quartets', multiple=False, graph_grid='t', prob_axes=True, \
    edge_colors='k', **kwargs):
    """Outputs a bubble plot of four-codon amino acid blocks
    labeled with the colors from Sueoka 2002.

    takes: data:  array-elements in the col order x, y, r of
           each of the four codon Amino Acids in the row order:
           ALA, ARG4, GLY, LEU4, PRO, SER, THR, VAL
           (for traditional fingerprint), or:
           UU -> GG (for 16-block fingerprint).
           last row is the mean (if has_mean is set True)

        **kwargs passed on to init_graph_display (these include 
        graph_shape, graph_grid, x_label, y_label, dark, with_parens).
                 
           title: will be printed on graph (default: 'Unknown Species')
           
           num_genes (number of genes contributing to graph: default None)
           NOTE: will not print if None.)
        
           size: of graph in inches (default = 8.0)

           alpha: transparency of bubbles
           (ranges from 0, transparent, to 1, opaque; default 0.7)
           
           show_legend: bool, default True, whether to print legend

           graph_name: name of file to write (default 'fingerprint.png')

           has_mean: whether the data contain the mean (default: True)

           which_blocks: which codon blocks to print (default is 'quartets'
           for the 4-codon amino acid blocks, but can also use 'all' for all 
           quartets or 'split' for just the split quartets.)

           multiple: if False (the default), assumes it got a single block
           of data. Otherwise, assumes multiple blocks of data in a list or
           array.

           edge_colors: if multiple is True (ignored otherwise), uses this
           sequence of edge color strings to hand out edge colors to successive
           series. Will iterate over this, so can be a string of 1-letter
           color codes or a list of color names.

    note: that the data are always expected to be in the range (0,1)
    since we're plotting frequencies. axes, gid, etc. are hard-coded
    to these values. 
    """
    #figure out which type of fingerprint plot we're doing, and get the
    #right colors
    if which_blocks == 'quartets':
        blocks = CodonUsage.SingleAABlocks
    elif which_blocks == 'split':
        blocks = CodonUsage.SplitBlocks
    else:
        blocks = CodonUsage.Blocks

    colors = [doublets_to_colors[i] for i in blocks]
      
    #formatting the labels in latex
    x_label="$G_3/(G_3+C_3)$"
    y_label="$A_3/(A_3+T_3)$"

    #initializing components of the graph
    font,label_font_size=init_graph_display(graph_shape='sqr', \
        graph_grid=graph_grid, x_label=x_label, \
        y_label=y_label, prob_axes=prob_axes, **kwargs)

    if not multiple:
        data = [data]
 
    alpha = broadcast(alpha, len(data))
    edge_colors = broadcast(edge_colors, len(data))
  
    for al, d, edge_color in zip(alpha, data, edge_colors):
        #skip this series if no data
        if d is None or not d.any():
            continue
        for i, color in enumerate(colors):
            j = i+1
            #note: doing these as slices because scatter_classic needs the
            #extra level of nesting
            patches = scatter_classic(d[i:j,0], d[i:j,1],
                        s=(d[i:j,2]/2), c=color)
            #set alpha for the patches manually
            for p in patches:
                p.set_alpha(al)
                p.set_edgecolor(edge_color)
        
        #plot mean as its own point -- can't do cross with scatter
        if has_mean:
            mean_index = len(blocks)    #next index after the blocks
            plot([d[mean_index,0]], [d[mean_index,1]],
                 '-k+',markersize=label_font_size, alpha=al)
               

    abbrev = CodonUsage.BlockAbbreviations

    a = gca()
    #if show_legend is True prints a legend in the right center area
    if show_legend:
        legend_key = [abbrev[b] for b in blocks]
        #copy legend font properties from the x axis tick labels
        legend_font_props = \
            a.xaxis.get_label().get_fontproperties().copy()
        legend_font_scale_factor = 0.7
        curr_size = legend_font_props.get_size()
        legend_font_props.set_size(curr_size*legend_font_scale_factor)
        l = figlegend(a.patches[:len(blocks)],
                  legend_key,
                  prop=legend_font_props,
                  loc='center right',borderpad=0.1,labelspacing=0.5,
                  handlelength=1.0,handletextpad=0.5, borderaxespad=0.0)
        #fix transparency of patches
        for p in l.get_patches():
            p.set_alpha(1)

    #initialize the ticks
    set_axis_to_probs()
    init_ticks(a, label_font_size)
    a.set_xticks([0, 0.5, 1])
    a.set_yticks([0,0.5,1])
    
    #output the figure
    if graph_name is not None:
        savefig(graph_name)

#Contour plots and related functions

def plot_cai_p3_contour(x_bin,y_bin,data,xy_data,
                        graph_name='cai_contour.png',
                        prob_axes=True, **kwargs):
    """Output a contour plot of cai vs p3 with colorbar on side

    takes: x_bin, y_bin, data (data matrix)
    
           label (default 'Unknown Species')

           num_genes (default 0 will not print, other numbers will)

           size: of graph in inches (default = 8.0)

           graph_name: default 'cai_contour.png'
    """
    plot_data =[(x_bin,y_bin,data)]
    plot_filled_contour(plot_data, graph_grid='/',x_label="$P_3$", \
        y_label="CAI", prob_axes=prob_axes, **kwargs)
    set_axis_to_probs()
    if graph_name is not None:
        savefig(graph_name)

def plot_cai_p3_contourlines(x_bin,y_bin,data,xy_data,
                             graph_name='cai_contourlines.png',
                             prob_axes=True, **kwargs):
    """Output a contour plot of cai
    
    takes: x_bin, y_bin, data (data matrix)
    
           label (default 'Unknown Species')

           num_genes (default 0 will not print, other numbers will)

           size: of graph in inches (default = 8.0)

           graph_name: default 'cai_contourlines.png'
    """
    plot_data =[(x_bin,y_bin,data)]
    plot_contour_lines(plot_data, graph_grid='/', x_label="$P_3$", \
        y_label="CAI", prob_axes=prob_axes,**kwargs)
    if graph_name is not None:
        savefig(graph_name)

def plot_p12_p3_contour(x_bin,y_bin,data,xy_data,
                        graph_name='p12_p3_contour.png',
                        prob_axes=True, **kwargs):
    """Outputs a P12 versus P3 contour graph
    and the mean equation of the plot

    takes: x_bin, y_bin, data (data matrix)
    
           label (default 'Unknown Species')

           num_genes (default 0 will not print, other numbers will)

           size: of graph in inches (default = 8.0)

           graph_name: default 'p12_p3_contourlines.png'
    """
    plot_data =[(x_bin,y_bin,data)]
    plot_filled_contour(plot_data, graph_grid='/', x_label="$P_3$", \
        y_label="$P_{12}$", prob_axes=prob_axes,**kwargs)
    set_axis_to_probs()
    if graph_name is not None:
        savefig(graph_name)

def plot_p12_p3_contourlines(x_bin,y_bin,data,xy_data, prob_axes=True,\
    graph_name='p12_p3_contourlines.png', **kwargs):
    """Outputs a P12 versus P3 contourline graph
    and the mean equation of the plot

    takes: x_bin, y_bin, data (data matrix)
    
           label (default 'Unknown Species')

           num_genes (default 0 will not print, other numbers will)

           size: of graph in inches (default = 8.0)

           graph_name: default 'p12_p3_contourlines.png
    """
    plot_data =[(x_bin,y_bin,data)]
    plot_contour_lines(plot_data, graph_grid='/', x_label="$P_3$",\
        y_label="$P_{12}$", prob_axes=prob_axes, **kwargs)
    set_axis_to_probs()
    if graph_name is not None:
        savefig(graph_name)

#Other graphs

def plot_pr2_bias(data, title='ALANINE', graph_name='pr2_bias.png', \
    num_genes='ignored', **kwargs):
    """Outputs a PR2-Bias plot of:
    -isotypic transversions (base swapping)
    with G3/(G3+C3) and A3/(A3+T3)
    -Transitions (deaminations)
    with G3/(G3+A3) and C3/(C3+T3)
    -Allotypic transversions (G- oxidations)
    with G3/(G3+T3) and C3/(C3+A3)

    takes: an array in the order: x,G3/(G3+C3),A3/(A3+T3),
    G3/(G3/A3),C3/(C3+T3),G3/(G3+T3),C3/(C3+A3)

    label: default 'ALANINE'
    one amino acid written out in caps:
    ALANINE, ARGININE4, GLYCINE, LEUCINE4,
    PROLINE, SERINE4, THREONINE, VALINE
       from one of the amino acids program will add acronym
       C2 type: ala(GCN), pro(CCN), ser4(TCN), thr(ACN)
       G2 type: arg4 (CGN), an gly(GGN)
       T2 type: leu4(CTN), val (GTN)

    size: of graph in inches (default = 8.0)

    graph_name: default 'pr2_bias.png'
    
    num_genes: number of genes contributing to graph, currently ignored.
    """
    #we can't put anything in the top right, so print num_genes after the title
    #if it was supplied
    #initializes the graph display and font
    font,label_font_size=init_graph_display(graph_shape='sqr', \
        graph_grid='/', x_label="$P_3$", y_label="Y axis", prob_axes=True, \
        title=title, **kwargs)
    #sets the marker_size relative to the font and thus the graph size
    marker_size = (label_font_size-1)
    
    #plots the pr2bias in order G3/(G3+C3),A3/(A3+T3),
    #                           G3/(G3/A3),C3/(C3+T3),
    #                           G3/(G3+T3),C3/(C3+A3)
    #colors and symbols coded from Sueoka 2002
    plot(data[:,0], data[:,1], '-ko', c='k',
         markersize=marker_size)
    plot(data[:,0], data[:,2], '-kv', c='k',
         markersize=marker_size)
    plot(data[:,0], data[:,3], '-ro', c='r',
         markersize=marker_size)
    plot(data[:,0], data[:,4], '-rv', c='r',
         markersize=marker_size)
    plot(data[:,0], data[:,5], '-wo', c='k', mfc='w',
         markersize=marker_size)
    plot(data[:,0], data[:,6], '-wv', c='k', mfc='w',
         markersize=marker_size)

    #aaLabel based on the amino acid that is graphed
    #C2 type: ala(GCN), pro(CCN), ser4(TCN), thr(ACN)
    #G2 type: arg4 (CGN), an gly(GGN)
    #T2 type: leu4(CTN), val (GTN) (Sueoka 2002)
    text(.95, .05, aa_labels[title], font, verticalalignment='bottom',
         horizontalalignment='right')

    #output the figure
    set_axis_to_probs()
    if graph_name is not None:
        savefig(graph_name)