This file is indexed.

/usr/lib/python2.7/dist-packages/cogent/maths/stats/kendall.py is in python-cogent 1.9-9.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Computes kendalls tau statistic and associated probabilities. A wrapper
function is provided in cogent.maths.stats.kendall_correlation

Translated from R 2.5 by Gavin Huttley
"""

from __future__ import division
from numpy import floor, sqrt, array
from cogent.maths.stats.util import Freqs
from cogent.maths.stats.distribution import zprob

__author__ = "Gavin Huttley"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Gavin Huttley", "Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Gavin Huttley"
__email__ = "Gavin.Huttley@anu.edu.au"
__status__ = "Production"

def as_paired_ranks(x, y):
    """return as matrix of paired ranks"""
    n = len(x)
    paired = zip(x,y)
    x = list(x)
    y = list(y)
    x.sort()
    y.sort()
    rank_val_map_x = dict(zip(x, range(n)))
    rank_val_map_y = dict(zip(y, range(n)))
    ranked = []
    for i in range(n):
        ranked += [[rank_val_map_x[paired[i][0]], rank_val_map_y[paired[i][1]]]]
    return ranked

def ckendall(k, n, w):
    # translated from R 2.5
    combin = n*(n-1)/2
    if k < 0 or k > combin:
        return 0
    if w[n][k] < 0:
        if n == 1:
            w[n][k] = k == 0
        else:
            s = 0
            for i in range(n):
                result = ckendall(k-i, n-1, w)
                s += result
            w[n][k] = s
    return w[n][k]

def pkendall(x, n, divisor, working):
    # translated from R 2.5
    q = floor(x + 1e-7)
    if q < 0:
        x = 0
    elif q > n * (n - 1) / 2:
        x = 1
    else:
        p = 0
        for k in range(int(q)+1):
            result = ckendall(k, n, working)
            p += result
        x = p / divisor
    return x

def kendalls_tau(x, y, return_p=True):
    """returns kendall's tau
    
    Arguments:
        - return_p: returns the probability from the normal approximation when
          True, otherwise just returns tau"""
    ranked = as_paired_ranks(x, y)
    n = len(ranked)
    denom = n * (n-1) / 2
    con = 0
    discor = 0
    x_tied = 0
    y_tied = 0
    for i in range(n-1):
        x_1 = ranked[i][0]
        y_1 = ranked[i][1]
        for j in range(i+1, n):
            x_2 = ranked[j][0]
            y_2 = ranked[j][1]
            x_diff = x_1 - x_2
            y_diff = y_1 - y_2
            if x_diff * y_diff > 0:
                con += 1
            elif x_diff and y_diff:
                discor += 1
            else:
                if x_diff:
                    y_tied += 1
                if y_diff:
                    x_tied += 1
    
    diff = con - discor
    total = con + discor
    denom = ((total + y_tied) * (total + x_tied))**0.5
    variance = (4*n+10) / (9*n*(n-1))
    tau = diff / denom
    stat = tau
    
    if x_tied or y_tied:
        x_tied = array([v for v in Freqs(x).itervalues() if v > 1])
        y_tied = array([v for v in Freqs(y).itervalues() if v > 1])
        t0 = n*(n-1)/2
        t1 = sum(x_tied * (x_tied-1)) / 2
        t2 = sum(y_tied * (y_tied-1)) / 2
        stat = tau * sqrt((t0-t1)*(t0-t2))
        v0 = n * (n - 1) * (2 * n + 5)
        vt = sum(x_tied * (x_tied - 1) * (2 * x_tied + 5))
        vu = sum(y_tied * (y_tied - 1) * (2 * y_tied + 5))
        v1 = sum(x_tied * (x_tied - 1)) * sum(y_tied * (y_tied - 1))
        v2 = sum(x_tied * (x_tied - 1) * (x_tied - 2)) * \
               sum(y_tied * (y_tied - 1) * (y_tied - 2))
        variance = (v0 - vt - vu) / 18 + v1 / (2 * n * (n - 1)) + v2 / (9 * n * \
                                                        (n - 1) * (n - 2))
    if return_p:
        return tau, zprob(stat / variance**0.5)
    else:
        return tau