/usr/lib/python2.7/dist-packages/cogent/maths/stats/kendall.py is in python-cogent 1.9-9.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | """
Computes kendalls tau statistic and associated probabilities. A wrapper
function is provided in cogent.maths.stats.kendall_correlation
Translated from R 2.5 by Gavin Huttley
"""
from __future__ import division
from numpy import floor, sqrt, array
from cogent.maths.stats.util import Freqs
from cogent.maths.stats.distribution import zprob
__author__ = "Gavin Huttley"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Gavin Huttley", "Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Gavin Huttley"
__email__ = "Gavin.Huttley@anu.edu.au"
__status__ = "Production"
def as_paired_ranks(x, y):
"""return as matrix of paired ranks"""
n = len(x)
paired = zip(x,y)
x = list(x)
y = list(y)
x.sort()
y.sort()
rank_val_map_x = dict(zip(x, range(n)))
rank_val_map_y = dict(zip(y, range(n)))
ranked = []
for i in range(n):
ranked += [[rank_val_map_x[paired[i][0]], rank_val_map_y[paired[i][1]]]]
return ranked
def ckendall(k, n, w):
# translated from R 2.5
combin = n*(n-1)/2
if k < 0 or k > combin:
return 0
if w[n][k] < 0:
if n == 1:
w[n][k] = k == 0
else:
s = 0
for i in range(n):
result = ckendall(k-i, n-1, w)
s += result
w[n][k] = s
return w[n][k]
def pkendall(x, n, divisor, working):
# translated from R 2.5
q = floor(x + 1e-7)
if q < 0:
x = 0
elif q > n * (n - 1) / 2:
x = 1
else:
p = 0
for k in range(int(q)+1):
result = ckendall(k, n, working)
p += result
x = p / divisor
return x
def kendalls_tau(x, y, return_p=True):
"""returns kendall's tau
Arguments:
- return_p: returns the probability from the normal approximation when
True, otherwise just returns tau"""
ranked = as_paired_ranks(x, y)
n = len(ranked)
denom = n * (n-1) / 2
con = 0
discor = 0
x_tied = 0
y_tied = 0
for i in range(n-1):
x_1 = ranked[i][0]
y_1 = ranked[i][1]
for j in range(i+1, n):
x_2 = ranked[j][0]
y_2 = ranked[j][1]
x_diff = x_1 - x_2
y_diff = y_1 - y_2
if x_diff * y_diff > 0:
con += 1
elif x_diff and y_diff:
discor += 1
else:
if x_diff:
y_tied += 1
if y_diff:
x_tied += 1
diff = con - discor
total = con + discor
denom = ((total + y_tied) * (total + x_tied))**0.5
variance = (4*n+10) / (9*n*(n-1))
tau = diff / denom
stat = tau
if x_tied or y_tied:
x_tied = array([v for v in Freqs(x).itervalues() if v > 1])
y_tied = array([v for v in Freqs(y).itervalues() if v > 1])
t0 = n*(n-1)/2
t1 = sum(x_tied * (x_tied-1)) / 2
t2 = sum(y_tied * (y_tied-1)) / 2
stat = tau * sqrt((t0-t1)*(t0-t2))
v0 = n * (n - 1) * (2 * n + 5)
vt = sum(x_tied * (x_tied - 1) * (2 * x_tied + 5))
vu = sum(y_tied * (y_tied - 1) * (2 * y_tied + 5))
v1 = sum(x_tied * (x_tied - 1)) * sum(y_tied * (y_tied - 1))
v2 = sum(x_tied * (x_tied - 1) * (x_tied - 2)) * \
sum(y_tied * (y_tied - 1) * (y_tied - 2))
variance = (v0 - vt - vu) / 18 + v1 / (2 * n * (n - 1)) + v2 / (9 * n * \
(n - 1) * (n - 2))
if return_p:
return tau, zprob(stat / variance**0.5)
else:
return tau
|