/usr/share/euler/progs/statist.e is in euler 1.61.0-10.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | comment
Statistical Functions
endcomment
function meandev (x,v=0)
## Return the mean value and the standard deviation of x1,...,xn.
## E.g. {m,d}=mean([1,2,3,3,4,2,5]).
## An additional parameter v may contain the multiplicities of x.
## E.g. {m,d}=mean([1,2,3,4,5],[1,2,2,1,1]) (same example).
## m=mean(x) will assign the mean value only!
if (argn()==2)
n=sum(v);
m=sum(x*v)/n;
d=sqrt((sum(x*x*v)-n*m*m)/(n-1));
else
n=cols(x);
m=sum(x)/n;
d=sqrt((sum(x*x)-n*m*m)/(n-1));
endif;
return {m,d};
endfunction
function mean (x)
## Returns the mean value of x.
## An additional parameter may contain multiplicities.
## See: meandev.
{m,d}=meandev(x,args()); return m;
endfunction
function dev (x)
## Returns the standard deviation of x.
## An additional parameter may contain multiplicities.
## See: meandev.
{m,d}=meandev(x,args()); return d;
endfunction
function qnormal (x,m,d)
## Returns the normal density with mean m and deviation d.
return 1/(d*sqrt(2*pi))*exp(-(x-m)^2/2/d^2);
endfunction
function xplotrange (r,v)
## Plots a barplot of the multiplicities v[i] in the ranges
## r[i],r[i+1]. r must be one longer than v.
## You can set the plot area with setplot beforehand.
s=scaling(1);
if s;
setplot(min(r),max(r),0,max(v));
endif;
scaling(s);
n=cols(v);
xplotbar(r[1:n],0,r[2:n+1]-r[1:n],v);
return plot();
endfunction
function chitest (x,y)
## Perform a xhi^2 test, if x obeys the distribution y.
## Return the error, if you reject this.
return 1-chidis(sum((x-y)^2/y),cols(x)-1);
endfunction
function testnormal (r,n,v,m,d)
## Test the data v[i] in the ranges r[i],r[i+1] against
## the normal deviation with mean m and deviation d,
## using the xhi^2 method.
## n is the total number of data (including those
## less than r[1] and larger than r[n]).
## Return the error you have, if you reject normal distribution.
t=normaldis((r-m)/d);
k=cols(v); p=(t[2:k+1]-t[1:k])*n;
return chitest(v,p);
endfunction
function ttest (m,d,n,mu)
## Test, if the measured mean m with deviation d of n data
## can come from a distribution with mean value mu.
## Returns the error you have, if you reject this.
return 1-tdis(abs(m-mu)/d*sqrt(n),n-1);
endfunction
function tcompare (m1,d1,n1,m2,d2,n2)
## Test, if two measured data with means mi, deviation di
## of ni data (i=1,2), aggree in mean.
## The data must be normally distributed.
## Returns the error you have, if you reject this.
h1=d1^2/n1; h2=d2^2/n2;
return 1-tdis(abs(m1-m2)/sqrt(h1+h2), ..
(h1+h2)^2/(h1^2/(n1+1)+h2^2/(n2+1))-2);
endfunction
function tcomparedata (x,y)
## Compare x and y on same mean, where x and y are
## normally distributed with different deviation.
## Return the error you have, if you reject this.
return tcompare(mean(x),dev(x),cols(x),mean(y),dev(y),cols(y));
endfunction
function tabletest (A)
## Test the results a[i,j] for independence of the rows
## from the columns. (chi^2 test)
## This should only be used for large table entries.
## Return the error you have, if you reject independence.
c=sum(A); r=sum(A')';
E=c*r/sum(r);
return 1-chidis(totalsum((A-E)^2/E),(cols(A)-1)*(rows(A)-1));
endfunction
function varanalysis
## Test the normally distributed data x1,x2,x3,... for same
## mean value.
## varanalysis(x1,x2,x3,...), where each xi is a row vector.
## Returns the error you have, if you reject same mean.
md=[mean(arg1),cols(arg1)];
s=sum((arg1-md[1])^2);
loop 2 to argn();
x=args(#); mx=mean(x);
md=md_[mx,cols(x)];
s=s+sum((x-mx)^2);
end;
md=md'; n=sum(md[2]); mt=sum(md[1]*md[2])/n;
s1=sum(md[2]*(md[1]-mt)^2);
return 1-fdis((s1/(argn()-1))/(s/(n-argn())),argn()-1,n-argn());
endfunction
function binsum1 (i,n,p)
if i>=n; return 1; endif;
d=sqrt(n*p*(1-p));
i1=n*p-10*d;
if i1<0; i1=0; endif;
if i<i1; return 0; endif;
if i>n*p+10*d; return 1; endif;
s=n*log(1-p);
if i>=1;
j=1:i+1;
s=cumsum(s|(log((n-j+1)/j*p/(1-p))));
endif;
return sum(exp(s[floor(i1):i+1]));
endfunction
function binsum (i,n,p)
## Compute the probablitiy of getting i or less hits
## in n runs, where the probability for each hit is p.
## Works even for large i and n, but is ineffective then.
## One may use the normalsum function for large n and
## medium p.
return map("binsum1",i,n,p);
endfunction
function normalsum (i,n,p)
## Works like binsum, but is much faster for large n
## and medium p.
return normaldis((i+0.5-n*p)/sqrt(n*p*(1-p)));
endfunction
function invbinsum1 (x,n,p)
if x<=0; return 0; endif;
if x>=1; return n; endif;
s=n*log(1-p);
d=sqrt(n*p*(1-p));
i1=floor(n*p-10*d); i2=floor(n*p+10*d);
if i1<0; i1=0; endif;
if i2>n; i2=n; endif;
j=1:i2;
s=cumsum(s|(log((n-j+1)/j*p/(1-p))));
return find(cumsum(exp(s[i1:length(s)])),x)+i1;
endfunction
function invbinsum (x,n,p)
## Computes i, such that binomialsum(i,n,p) is just larger than x.
return map("invbinsum1",x,n,p)
endfunction
function hypergeomsum1 (i,n,i1,n1)
if i<i1-(n1-n); return 0; endif;
if i>=i1; return 1; endif;
if i1<=n1-n;
s=logbin(n1-n,i1)-logbin(n1,i1);
if i>=1;
j=1:i;
s=cumsum(s|(log(n-j+1)-log(j)+log((i1-j+1))-log((n1-n-i1+j))));
endif;
else
s=log(bin(n,i1-(n1-n)))-log(bin(n1,i1));
if i>i1-(n1-n);
j=(i1-(n1-n)+1):i;
s=cumsum(s|(log(n-j+1)-log(j)+log(i1-j+1)-log(n1-n-i1+j)));
endif;
endif
return sum(exp(s));
endfunction
function hypergeomsum (i,n,itot,ntot)
## Return the probability of hitting i or less black balls, if
## n are chosen out of ntot, and there are a total of itot black
## balls (and ntot-itot white balls).
return map("hypergeomsum1",i,n,itot,ntot);
endfunction
function mediantest (a,b)
## Test the two distributions a and b on equal mean value.
## To do this both distributions are checked on exceeding
## the median of both.
## Returns the error you have, if you assume a does not
## exceed the median to often (i.e. a may be equal to b)
c=sort(a|b); n=cols(c);
if mod(n,2)==0; m=(c[n/2]+c[n/2+1])/2;
else m=c[n/2+1];
endif;
return 1-hypergeomsum(sum(a<m)-1,cols(a),floor(n/2),n);
endfunction
function ranktest (a,b,eps=sqrt(epsilon()))
## The Mann-Whitney test tests a and b on same distribution.
## Return the error you have, if you reject same distribution.
n1=cols(a); n2=cols(b); n=n1+n2;
{c,i}=sort(a|b); R=1:n;
{c1,i1}=sort(c+eps*(n:-1:1));
R=(R+R[i1])/2;
R1=sum(R*(i<=n1)); R2=sum(R*(i>n1));
return 1-normaldis(abs((min(R1-n1*(n1+1)/2,R2-n2*(n2+1)/2)-n1*n2/2)/ ..
sqrt(n1*n2*(n1+n2+1)/12)));
endfunction
function signtest (a,b)
## Assume a(i) and b(i) are results of treatment a and b at i.
## a and b must be row vectors of equal length.
## Test, if a is not better than b.
## Return the error you have, if you decide that a is better
## than b.
n=cols(a); i=sum(a>b);
return 1-binsum(i-1,n,0.5);
endfunction
function wilcoxon (a,b,eps=sqrt(epsilon()))
## This is a sharper test for the same problem as in signtest.
## See: signtest
## Returns the error you have, if you decide that a is better
## than b.
d=a-b; n=cols(d);
{c,i}=sort(abs(d)); R=1:n;
{c1,i1}=sort(c+eps*(n:-1:1));
R=(R+R[i1])/2;
R1=sum(R*(d[i]<0)); R2=sum(R*(d[i]>=0));
W=R2;
return 1-normaldis((W-n*(n+1)/4)/ ..
sqrt(n*(n+1)*(2*n+1)/24)));
endfunction
|