This file is indexed.

/usr/share/octave/packages/statistics-1.3.0/anderson_darling_test.m is in octave-statistics 1.3.0-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
## Author: Paul Kienzle <pkienzle@users.sf.net>
## This program is granted to the public domain.

## -*- texinfo -*-
## @deftypefn {Function File} {[@var{q}, @var{Asq}, @var{info}] = } = @
## anderson_darling_test (@var{x}, @var{distribution})
##
## Test the hypothesis that @var{x} is selected from the given distribution
## using the Anderson-Darling test.  If the returned @var{q} is small, reject
## the hypothesis at the @var{q}*100% level.
##
## The Anderson-Darling @math{@var{A}^2} statistic is calculated as follows:
##
## @example
## @iftex
## A^2_n = -n - \sum_{i=1}^n (2i-1)/n log(z_i (1-z_{n-i+1}))
## @end iftex
## @ifnottex
##               n
## A^2_n = -n - SUM (2i-1)/n log(@math{z_i} (1 - @math{z_@{n-i+1@}}))
##              i=1
## @end ifnottex
## @end example
##
## where @math{z_i} is the ordered position of the @var{x}'s in the CDF of the 
## distribution.  Unlike the Kolmogorov-Smirnov statistic, the 
## Anderson-Darling statistic is sensitive to the tails of the 
## distribution.
##
## The @var{distribution} argument must be a either @t{"uniform"}, @t{"normal"},
## or @t{"exponential"}.
##
## For @t{"normal"}' and @t{"exponential"} distributions, estimate the 
## distribution parameters from the data, convert the values 
## to CDF values, and compare the result to tabluated critical 
## values.  This includes an correction for small @var{n} which 
## works well enough for @var{n} >= 8, but less so from smaller @var{n}.  The
## returned @code{info.Asq_corrected} contains the adjusted statistic.
##
## For @t{"uniform"}, assume the values are uniformly distributed
## in (0,1), compute @math{@var{A}^2} and return the corresponding @math{p}-value from
## @code{1-anderson_darling_cdf(A^2,n)}.
## 
## If you are selecting from a known distribution, convert your 
## values into CDF values for the distribution and use @t{"uniform"}.
## Do not use @t{"uniform"} if the distribution parameters are estimated 
## from the data itself, as this sharply biases the @math{A^2} statistic
## toward smaller values.
##
## [1] Stephens, MA; (1986), "Tests based on EDF statistics", in
## D'Agostino, RB; Stephens, MA; (eds.) Goodness-of-fit Techinques.
## New York: Dekker.
##
## @seealso{anderson_darling_cdf}
## @end deftypefn

function [q,Asq,info] = anderson_darling_test(x,dist)

  if size(x,1) == 1, x=x(:); end
  x = sort(x);
  n = size(x,1);
  use_cdf = 0;

  # Compute adjustment and critical values to use for stats.
  switch dist
    case 'normal',
      # This expression for adj is used in R.
      # Note that the values from NIST dataplot don't work nearly as well.
      adj = 1 + (.75 + 2.25/n)/n;
      qvals = [ 0.1, 0.05, 0.025, 0.01 ];
      Acrit = [ 0.631, 0.752, 0.873, 1.035];
      x = stdnormal_cdf(zscore(x));

    case 'uniform',
      ## Put invalid data at the limits of the distribution
      ## This will drive the statistic to infinity.
      x(x<0) = 0;
      x(x>1) = 1;
      adj = 1.;
      qvals = [ 0.1, 0.05, 0.025, 0.01 ];
      Acrit = [ 1.933, 2.492, 3.070, 3.857 ];
      use_cdf = 1;

    case 'XXXweibull',
      adj = 1 + 0.2/sqrt(n);
      qvals = [ 0.1, 0.05, 0.025, 0.01 ];
      Acrit = [ 0.637, 0.757, 0.877, 1.038];
      ## XXX FIXME XXX how to fit alpha and sigma?
      x = wblcdf (x, ones(n,1)*sigma, ones(n,1)*alpha);

    case 'exponential',
      adj = 1 + 0.6/n;
      qvals = [ 0.1, 0.05, 0.025, 0.01 ];
      # Critical values depend on n.  Choose the appropriate critical set.
      # These values come from NIST dataplot/src/dp8.f.
      Acritn = [
                  0, 1.022, 1.265, 1.515, 1.888
                 11, 1.045, 1.300, 1.556, 1.927;
                 21, 1.062, 1.323, 1.582, 1.945;
                 51, 1.070, 1.330, 1.595, 1.951;
                101, 1.078, 1.341, 1.606, 1.957;
                ];
      # FIXME: consider interpolating in the critical value table.
      Acrit = Acritn(lookup(Acritn(:,1),n),2:5);

      lambda = 1./mean(x);  # exponential parameter estimation
      x = expcdf(x, 1./(ones(n,1)*lambda));

    otherwise
      # FIXME consider implementing more of distributions; a number
      # of them are defined in NIST dataplot/src/dp8.f.
      error("Anderson-Darling test for %s not implemented", dist);
  endswitch

  if any(x<0 | x>1)
    error('Anderson-Darling test requires data in CDF form');
  endif

  i = [1:n]'*ones(1,size(x,2));
  Asq = -n - sum( (2*i-1) .* (log(x) + log(1-x(n:-1:1,:))) )/n;

  # Lookup adjusted critical value in the cdf (if uniform) or in the
  # the critical table.
  if use_cdf
    q = 1-anderson_darling_cdf(Asq*adj, n);
  else
    idx = lookup([-Inf,Acrit],Asq*adj);
    q = [1,qvals](idx); 
  endif

  if nargout > 2,
    info.Asq = Asq;
    info.Asq_corrected = Asq*adj;
    info.Asq_critical = [100*(1-qvals); Acrit]';
    info.p = 1-q;
    info.p_is_precise = use_cdf;
  endif
endfunction

%!demo
%! c = anderson_darling_test(10*rande(12,10000),'exponential');
%! tabulate(100*c,100*[unique(c),1]);
%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less.

%!demo
%! c = anderson_darling_test(randn(12,10000),'normal');
%! tabulate(100*c,100*[unique(c),1]);
%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less.

%!demo
%! c = anderson_darling_test(rand(12,10000),'uniform');
%! hist(100*c,1:2:99);
%! % The histogram should be flat more or less.