/usr/lib/python2.7/dist-packages/Scientific/Statistics/__init__.py is in python-scientific 2.9.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | # Basic statistics functions.
#
# Written by Konrad Hinsen <hinsen@cnrs-orleans.fr>
# With contributions from Moshe Zadka <mzadka@geocities.com>
# last revision: 2006-5-28
#
"""
Basic monovariate statistics
"""
from Scientific import N
#
# Univariate statistics functions
#
def moment(data, order, about=None, theoretical=1):
"""
Calculate the moments of the distribution corresponding to a set of data
@param data: a sequence of data points
@type data: C{Numeric.array} of numbers
@param order: the order of the moment to be calculated
@type order: C{int}
@param about: the center of the distribution. If C{None}, use the
mean value as the center.
@type about: number
@param theoretical: for internal use
@returns: the moment of the given order about the specified center
@rtype: number
"""
data = 1.*N.array(data)
if about is None:
about = mean(data)
theoretical = 0
ln = len(data)-(1-theoretical)
return 1.*N.add.reduce((data-about)**order)/ln
def mean(data):
"""
Mean (average value, first moment)
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the mean of the number sequence
@rtype: number
"""
return moment(data, 1, 0)
average = mean
def weightedMean(data, sigma):
"""
Weighted mean of a sequence of numbers with given standard deviations
@param data: a sequence of measurements
@type data: C{list} or C{Numeric.array}
@param sigma: a sequence of corresponding standard deviations
@type sigma: C{list} or C{Numeric.array}
@returns: the weighted mean and corresponding standard deviation
@rtype: C{tuple} of two elements
"""
if len(data) != len(sigma):
raise ValueError
data = 1.*N.array(data)
sigma = 1.*N.array(sigma)
nom = N.sum(data/sigma**2)
denom = N.sum(1./sigma**2)
mean = nom / denom
sig = N.sqrt(1./denom)
return mean, sig
def variance(data):
"""
Variance (second moment)
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the variance of the number sequence
@rtype: number
"""
return moment(data, 2)
def standardDeviation(data):
"""
Standard deviation
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the standard deviation of the number sequence
@rtype: number
"""
return N.sqrt(variance(data))
def median(data):
"""
Median
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the median of the number sequence
@rtype: number
"""
data = N.sort(N.array(data))
l = (len(data)-1)/2.
return (data[int(N.floor(l))]+data[int(N.ceil(l))])/2.
def mode(data):
h = {}
for n in data:
try: h[n] = h[n]+1
except KeyError: h[n] = 1
a = map(lambda x: (x[1], x[0]), h.items())
return max(a)[1]
def normalizedMoment(data, order):
"""
Calculate the normalized moments of the distribution corresponding
to a set of data. Normalization means division by the n-th power of
the standard deviation.
@param data: a sequence of data points
@type data: C{Numeric.array} of numbers
@param order: the order of the moment to be calculated
@type order: C{int}
@returns: the normalized moment of the given order
@rtype: number
"""
mn = mean(data)
return moment(data, order, mn)/N.sqrt(moment(data, 2, mn)**order)
def skewness(data):
"""
Skewness (third normalized moment)
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the skewness of the number sequence
@rtype: number
"""
return normalizedMoment(data, 3)
def kurtosis(data):
"""
Kurtosis (fourth normalized moment)
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the kurtosis of the number sequence
@rtype: number
"""
return normalizedMoment(data, 4)
## def chiSquare(data):
## h = {}
## for n in data:
## try: h[n] = h[n]+1
## except KeyError: h[n] = 1
## h = N.array(h.values())
## h = h/N.add.reduce(h)
## return moment(h, 2, 1./len(h))
def correlation(data1, data2):
"""
Calculates the correlation coefficient between two data sequences
@param data1: first data sequence
@type data1: C{Numeric.array} or C{list}
@param data2: second data sequence of length identical to data1
@type data2: C{Numeric.array} or C{list}
@returns: the correlation coefficient between data1 and data2
@rtype: number
@raises ValueError: if data1 and data2 have different lengths
"""
if len(data1) != len(data2):
raise ValueError("data series must have equal length")
data1 = N.array(data1)
data2 = N.array(data2)
data1 = data1 - N.add.reduce(data1)/len(data1)
data2 = data2 - N.add.reduce(data2)/len(data2)
return N.add.reduce(data1*data2) / \
N.sqrt(N.add.reduce(data1*data1) \
* N.add.reduce(data2*data2))
|