/usr/share/pyshared/mvpa/measures/anova.py is in python-mvpa 0.4.8-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the PyMVPA package for the
# copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""FeaturewiseDatasetMeasure performing a univariate ANOVA."""
__docformat__ = 'restructuredtext'
import numpy as N
from mvpa.measures.base import FeaturewiseDatasetMeasure
# TODO: Extend with access to functionality from scipy.stats?
# For binary:
# 2-sample kolmogorov-smirnof might be interesting
# (scipy.stats.ks_2samp) to judge if two conditions are derived
# from different distributions (take it as 'activity' vs 'rest'),
#
# For binary+multiclass:
# kruskal-wallis H-test (scipy.stats.kruskal)
#
# and may be some others
class OneWayAnova(FeaturewiseDatasetMeasure):
"""`FeaturewiseDatasetMeasure` that performs a univariate ANOVA.
F-scores are computed for each feature as the standard fraction of between
and within group variances. Groups are defined by samples with unique
labels.
No statistical testing is performed, but raw F-scores are returned as a
sensitivity map. As usual F-scores have a range of [0,inf] with greater
values indicating higher sensitivity.
"""
def _call(self, dataset, labels=None):
# This code is based on SciPy's stats.f_oneway()
# Copyright (c) Gary Strangman. All rights reserved
# License: BSD
#
# However, it got tweaked and optimized to better fit into PyMVPA.
# number of groups
if labels is None:
labels = dataset.labels
ul = N.unique(labels)
na = len(ul)
bign = float(dataset.nsamples)
alldata = dataset.samples
# total squares of sums
sostot = N.sum(alldata, axis=0)
sostot *= sostot
sostot /= bign
# total sum of squares
sstot = N.sum(alldata * alldata, axis=0) - sostot
# between group sum of squares
ssbn = 0
for l in ul:
# all samples for the respective label
d = alldata[labels == l]
sos = N.sum(d, axis=0)
sos *= sos
ssbn += sos / float(len(d))
ssbn -= sostot
# within
sswn = sstot - ssbn
# degrees of freedom
dfbn = na-1
dfwn = bign - na
# mean sums of squares
msb = ssbn / float(dfbn)
msw = sswn / float(dfwn)
f = msb / msw
# assure no NaNs -- otherwise it leads instead of
# sane unittest failure (check of NaNs) to crazy
# File "mtrand.pyx", line 1661, in mtrand.shuffle
# TypeError: object of type 'numpy.int64' has no len()
# without any sane backtrace
f[N.isnan(f)] = 0
return f
# XXX maybe also compute p-values?
#prob = scipy.stats.fprob(dfbn, dfwn, f)
#return prob
class CompoundOneWayAnova(OneWayAnova):
"""Compound comparisons via univariate ANOVA.
Provides F-scores per each label if compared to the other labels.
"""
def _call(self, dataset):
"""Computes featurewise f-scores using compound comparisons."""
orig_labels = dataset.labels
labels = orig_labels.copy()
results = []
for ul in dataset.uniquelabels:
labels[orig_labels == ul] = 1
labels[orig_labels != ul] = 2
results.append(OneWayAnova._call(self, dataset, labels))
# features x labels
return N.array(results).T
|