/usr/share/octave/packages/statistics-1.2.3/anderson_darling_test.m is in octave-statistics 1.2.3-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | ## Author: Paul Kienzle <pkienzle@users.sf.net>
## This program is granted to the public domain.
## -*- texinfo -*-
## @deftypefn {Function File} {[@var{q}, @var{Asq}, @var{info}] = } = @
## anderson_darling_test (@var{x}, @var{distribution})
##
## Test the hypothesis that @var{x} is selected from the given distribution
## using the Anderson-Darling test. If the returned @var{q} is small, reject
## the hypothesis at the @var{q}*100% level.
##
## The Anderson-Darling @math{@var{A}^2} statistic is calculated as follows:
##
## @example
## @iftex
## A^2_n = -n - \sum_{i=1}^n (2i-1)/n log(z_i (1-z_{n-i+1}))
## @end iftex
## @ifnottex
## n
## A^2_n = -n - SUM (2i-1)/n log(@math{z_i} (1 - @math{z_@{n-i+1@}}))
## i=1
## @end ifnottex
## @end example
##
## where @math{z_i} is the ordered position of the @var{x}'s in the CDF of the
## distribution. Unlike the Kolmogorov-Smirnov statistic, the
## Anderson-Darling statistic is sensitive to the tails of the
## distribution.
##
## The @var{distribution} argument must be a either @t{"uniform"}, @t{"normal"},
## or @t{"exponential"}.
##
## For @t{"normal"}' and @t{"exponential"} distributions, estimate the
## distribution parameters from the data, convert the values
## to CDF values, and compare the result to tabluated critical
## values. This includes an correction for small @var{n} which
## works well enough for @var{n} >= 8, but less so from smaller @var{n}. The
## returned @code{info.Asq_corrected} contains the adjusted statistic.
##
## For @t{"uniform"}, assume the values are uniformly distributed
## in (0,1), compute @math{@var{A}^2} and return the corresponding @math{p}-value from
## @code{1-anderson_darling_cdf(A^2,n)}.
##
## If you are selecting from a known distribution, convert your
## values into CDF values for the distribution and use @t{"uniform"}.
## Do not use @t{"uniform"} if the distribution parameters are estimated
## from the data itself, as this sharply biases the @math{A^2} statistic
## toward smaller values.
##
## [1] Stephens, MA; (1986), "Tests based on EDF statistics", in
## D'Agostino, RB; Stephens, MA; (eds.) Goodness-of-fit Techinques.
## New York: Dekker.
##
## @seealso{anderson_darling_cdf}
## @end deftypefn
function [q,Asq,info] = anderson_darling_test(x,dist)
if size(x,1) == 1, x=x(:); end
x = sort(x);
n = size(x,1);
use_cdf = 0;
# Compute adjustment and critical values to use for stats.
switch dist
case 'normal',
# This expression for adj is used in R.
# Note that the values from NIST dataplot don't work nearly as well.
adj = 1 + (.75 + 2.25/n)/n;
qvals = [ 0.1, 0.05, 0.025, 0.01 ];
Acrit = [ 0.631, 0.752, 0.873, 1.035];
x = stdnormal_cdf(zscore(x));
case 'uniform',
## Put invalid data at the limits of the distribution
## This will drive the statistic to infinity.
x(x<0) = 0;
x(x>1) = 1;
adj = 1.;
qvals = [ 0.1, 0.05, 0.025, 0.01 ];
Acrit = [ 1.933, 2.492, 3.070, 3.857 ];
use_cdf = 1;
case 'XXXweibull',
adj = 1 + 0.2/sqrt(n);
qvals = [ 0.1, 0.05, 0.025, 0.01 ];
Acrit = [ 0.637, 0.757, 0.877, 1.038];
## XXX FIXME XXX how to fit alpha and sigma?
x = wblcdf (x, ones(n,1)*sigma, ones(n,1)*alpha);
case 'exponential',
adj = 1 + 0.6/n;
qvals = [ 0.1, 0.05, 0.025, 0.01 ];
# Critical values depend on n. Choose the appropriate critical set.
# These values come from NIST dataplot/src/dp8.f.
Acritn = [
0, 1.022, 1.265, 1.515, 1.888
11, 1.045, 1.300, 1.556, 1.927;
21, 1.062, 1.323, 1.582, 1.945;
51, 1.070, 1.330, 1.595, 1.951;
101, 1.078, 1.341, 1.606, 1.957;
];
# FIXME: consider interpolating in the critical value table.
Acrit = Acritn(lookup(Acritn(:,1),n),2:5);
lambda = 1./mean(x); # exponential parameter estimation
x = expcdf(x, 1./(ones(n,1)*lambda));
otherwise
# FIXME consider implementing more of distributions; a number
# of them are defined in NIST dataplot/src/dp8.f.
error("Anderson-Darling test for %s not implemented", dist);
endswitch
if any(x<0 | x>1)
error('Anderson-Darling test requires data in CDF form');
endif
i = [1:n]'*ones(1,size(x,2));
Asq = -n - sum( (2*i-1) .* (log(x) + log(1-x(n:-1:1,:))) )/n;
# Lookup adjusted critical value in the cdf (if uniform) or in the
# the critical table.
if use_cdf
q = 1-anderson_darling_cdf(Asq*adj, n);
else
idx = lookup([-Inf,Acrit],Asq*adj);
q = [1,qvals](idx);
endif
if nargout > 2,
info.Asq = Asq;
info.Asq_corrected = Asq*adj;
info.Asq_critical = [100*(1-qvals); Acrit]';
info.p = 1-q;
info.p_is_precise = use_cdf;
endif
endfunction
%!demo
%! c = anderson_darling_test(10*rande(12,10000),'exponential');
%! tabulate(100*c,100*[unique(c),1]);
%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less.
%!demo
%! c = anderson_darling_test(randn(12,10000),'normal');
%! tabulate(100*c,100*[unique(c),1]);
%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less.
%!demo
%! c = anderson_darling_test(rand(12,10000),'uniform');
%! hist(100*c,1:2:99);
%! % The histogram should be flat more or less.
|