/usr/lib/python2.7/dist-packages/cogent/maths/stats/distribution.py

#!/usr/bin/env python
"""Translations of functions from Release 2.3 of the Cephes Math Library, 
which is (c) Stephen L. Moshier 1984, 1995.
"""
from __future__ import division
from cogent.maths.stats.special import erf, erfc, igamc, igam, betai, log1p, \
    expm1, SQRTH, MACHEP, MAXNUM, PI, ndtri, incbi, igami, fix_rounding_error,\
    ln_binomial
    #ndtri import b/c it should be available via this module

from numpy import sqrt, exp, arctan as atan

__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2016, The Cogent Project"
__credits__ = ["Rob Knight", "Sandra Smit", "Gavin Huttley", "Daniel McDonald"]
__license__ = "GPL"
__version__ = "1.9"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Production"

incbet = betai  #shouldn't have renamed it...

#Probability integrals: low gives left-hand tail, high gives right-hand tail.
def z_low(x):
    """Returns left-hand tail of z distribution (0 to x). 
    
    x ranges from -infinity to +infinity; result ranges from 0 to 1
    
    See Cephes docs for details."""
    y = x * SQRTH
    z = abs(y) #distribution is symmetric
    if z < SQRTH:
        return 0.5 + 0.5 * erf(y)
    else:
        if y > 0:
            return 1 - 0.5 * erfc(z)
        else:
            return 0.5 * erfc(z)

def z_high(x):
    """Returns right-hand tail of z distribution (0 to x). 
    
    x ranges from -infinity to +infinity; result ranges from 0 to 1
    
    See Cephes docs for details."""
    y = x * SQRTH
    z = abs(y)
    if z < SQRTH:
        return 0.5 - 0.5 * erf(y)
    else:
        if x < 0:
            return 1 - 0.5 * erfc(z)
        else:
            return 0.5 * erfc(z)

def zprob(x):
    """Returns both tails of z distribution (-inf to -x, inf to x)."""
    return 2 * z_high(abs(x))

def chi_low(x, df):
    """Returns left-hand tail of chi-square distribution (0 to x), given df.

    x ranges from 0 to infinity.
    
    df, the degrees of freedom, ranges from 1 to infinity (assume integers).
    Typically, df is (r-1)*(c-1) for a r by c table.
   
    Result ranges from 0 to 1.
    
    See Cephes docs for details.
    """
    x = fix_rounding_error(x)
    if x < 0:
        raise ValueError, "chi_low: x must be >= 0 (got %s)." % x
    if df < 1:
        raise ValueError, "chi_low: df must be >= 1 (got %s)." % df
    return igam(df/2, x/2)
    
def chi_high(x, df):
    """Returns right-hand tail of chi-square distribution (x to infinity).
    
    df, the degrees of freedom, ranges from 1 to infinity (assume integers).
    Typically, df is (r-1)*(c-1) for a r by c table.
    
    Result ranges from 0 to 1.
    
    See Cephes docs for details.
    """
    x = fix_rounding_error(x)
    
    if x < 0:
        raise ValueError, "chi_high: x must be >= 0 (got %s)." % x
    if df < 1:
        raise ValueError, "chi_high: df must be >= 1 (got %s)." % df
    return igamc(df/2, x/2)
 
def t_low(t, df):
    """Returns left-hand tail of Student's t distribution (-infinity to x).
    
    df, the degrees of freedom, ranges from 1 to infinity.
    Typically, df is (n-1) for a sample size of n.
    
    Result ranges from 0 to 1.
    
    See Cephes docs for details.
    """
    if df < 1:
        raise ValueError, "t_low: df must be >= 1 (got %s)." % df
    return stdtr(df, t)

def t_high(t, df):
    """Returns right-hand tail of Student's t distribution (x to infinity).
    
    df, the degrees of freedom, ranges from 1 to infinity.
    Typically, df is (n-1) for a sample size of n.
    
    Result ranges from 0 to 1.
    
    See Cephes docs for details.
    """
    if df < 1:
        raise ValueError, "t_high: df must be >= 1 (got %s)." % df
    return stdtr(df, -t) #distribution is symmetric

def tprob(t, df):
    """Returns both tails of t distribution (-infinity to -x, infinity to x)"""
    return 2 * t_high(abs(t), df)

def poisson_high(successes, mean):
    """Returns right tail of Poission distribution, Pr(X > x).
    
    successes ranges from 0 to infinity. mean must be positive.
    """
    return pdtrc(successes, mean)

def poisson_low(successes, mean):
    """Returns left tail of Poisson distribution, Pr(X <= x).

    successes ranges from 0 to infinity. mean must be positive.
    """
    return pdtr(successes, mean)

def poisson_exact(successes, mean):
    """Returns Poisson probablity for exactly Pr(X=successes).
    
    Formula is e^-(mean) * mean^(successes) / (successes)!
    """
    if successes == 0:
        return pdtr(0, mean)
    elif successes < mean:  #use left tail
            return pdtr(successes, mean) - pdtr(successes-1, mean)
    else: #successes > mean: use right tail
        return pdtrc(successes-1, mean) - pdtrc(successes, mean)

def binomial_high(successes, trials, prob):
    """Returns right-hand binomial tail (X > successes) given prob(success)."""
    if -1 <= successes < 0:
        return 1
    return bdtrc(successes, trials, prob)

def binomial_low(successes, trials, prob):
    """Returns left-hand binomial tail (X <= successes) given prob(success)."""
    return bdtr(successes, trials, prob)

def binomial_exact(successes, trials, prob):
    """Returns binomial probability of exactly X successes.
    
    Works for integer and floating point values.

    Note: this function is only a probability mass function for integer 
    values of 'trials' and 'successes', i.e. if you sum up non-integer 
    values you probably won't get a sum of 1.
    """
    if (prob < 0) or (prob > 1):
        raise ValueError, "Binomial prob must be between 0 and 1."
    if (successes < 0) or (trials < successes):
        raise ValueError, "Binomial successes must be between 0 and trials."
    return exp(ln_binomial(successes, trials, prob))

def f_low(df1, df2, x):
    """Returns left-hand tail of f distribution (0 to x).

    x ranges from 0 to infinity.
    
    Result ranges from 0 to 1.
    
    See Cephes docs for details.
    """
    return fdtr(df1, df2, x)
    
def f_high(df1, df2, x):
    """Returns right-hand tail of f distribution (x to infinity).
    
    Result ranges from 0 to 1.
    
    See Cephes docs for details.
    """
    return fdtrc(df1, df2, x)
  
def fprob(dfn, dfd, F, side='right'):
    """Returns both tails of F distribution (-inf to F and F to inf)

    Use in case of two-tailed test. Usually this method is called by
    f_two_sample, so you don't have to worry about choosing the right side.
    
    side: right means return twice the right-hand tail of the F-distribution.
        Use in case var(a) > var (b)
          left means return twice the left-hand tail of the F-distribution.
        Use in case var(a) < var(b)
    """
    if F < 0:
        raise ValueError, "fprob: F must be >= 0 (got %s)." % F
    if side=='right':
        return 2*f_high(dfn, dfd, F)
    elif side=='left':
        return 2*f_low(dfn, dfd, F)
    else:
        raise ValueError, "Not a valid value for side %s"%(side)

 
def stdtr(k, t):
    """Student's t distribution, -infinity to t.

    See Cephes docs for details.
    """
    if k <= 0:
        raise ValueError, 'stdtr: df must be > 0.'
    if t == 0:
        return 0.5
    if t < -2:
        rk = k
        z = rk / (rk + t * t)
        return 0.5 * betai(0.5 * rk, 0.5, z)
    #compute integral from -t to + t
    if t < 0:
        x = -t
    else:
        x = t

    rk = k  #degrees of freedom
    z = 1 + (x * x)/rk
    #test if k is odd or even
    if (k & 1) != 0:
        #odd k
        xsqk = x/sqrt(rk)
        p = atan(xsqk)
        if k > 1:
            f = 1
            tz = 1
            j = 3
            while (j <= (k-2)) and ((tz/f) > MACHEP):
                tz *= (j-1)/(z*j)
                f += tz
                j += 2
            p += f * xsqk/z
        p *= 2/PI
    else:
        #even k
        f = 1
        tz = 1
        j = 2
        while (j <= (k-2)) and ((tz/f) > MACHEP):
            tz *= (j-1)/(z*j)
            f += tz
            j += 2
        p = f * x/sqrt(z*rk)
    #common exit
    if t < 0:
        p = -p  #note destruction of relative accuracy
    p = 0.5 + 0.5 * p
    return p

def bdtr(k, n, p):
    """Binomial distribution, 0 through k.

    Uses formula bdtr(k, n, p) = betai(n-k, k+1, 1-p)

    See Cephes docs for details.
    """
    p = fix_rounding_error(p)
    if (p < 0) or (p > 1):
        raise ValueError, "Binomial p must be between 0 and 1."
    if (k < 0) or (n < k):
        raise ValueError, "Binomial k must be between 0 and n."
    if k == n:
        return 1
    dn = n - k
    if k == 0:
        return  pow(1-p, dn)
    else:
        return  betai(dn, k+1, 1-p)

def bdtrc(k, n, p):
    """Complement of binomial distribution, k+1 through n.

    Uses formula bdtrc(k, n, p) = betai(k+1, n-k, p)

    See Cephes docs for details.
    """
    p = fix_rounding_error(p)
    if (p < 0) or (p > 1):
        raise ValueError, "Binomial p must be between 0 and 1."
    if (k < 0) or (n < k):
        raise ValueError, "Binomial k must be between 0 and n."
    if k == n:
        return 0
    dn = n - k
    if k == 0:
        if p < .01:
            dk = -expm1(dn * log1p(-p))
        else:
            dk = 1 - pow(1.0-p, dn)
    else:
        dk = k + 1
        dk = betai(dk, dn, p)
    return dk
    
def pdtr(k, m):
    """Returns sum of left tail of Poisson distribution, 0 through k.

    See Cephes docs for details.
    """
    if k < 0:
        raise ValueError, "Poisson k must be >= 0."
    if m < 0:
        raise ValueError, "Poisson m must be >= 0."
    return igamc(k+1, m)

def pdtrc(k, m):
    """Returns sum of right tail of Poisson distribution, k+1 through infinity.

    See Cephes docs for details.
    """
    if k < 0:
        raise ValueError, "Poisson k must be >= 0."
    if m < 0:
        raise ValueError, "Poisson m must be >= 0."
    return igam(k+1, m)

def fdtr(a, b, x):
    """Returns left tail of F distribution, 0 to x.

    See Cephes docs for details.
    """
    if min(a, b) < 1:
        raise ValueError, "F a and b (degrees of freedom) must both be >= 1."
    if x < 0:
        raise ValueError, "F distribution value of f must be >= 0."
    w = a * x
    w /= float(b + w)
    return betai(0.5 * a, 0.5 * b, w)
    

def fdtrc(a, b, x):
    """Returns right tail of F distribution, x to infinity.

    See Cephes docs for details.
    """
    if min(a, b) < 1:
        raise ValueError, "F a and b (degrees of freedom) must both be >= 1."
    if x < 0:
        raise ValueError, "F distribution value of f must be >= 0."
    w = float(b) / (b + a * x)
    return betai(0.5 * b, 0.5 * a, w)

def gdtr(a, b, x):
    """Returns integral from 0 to x of Gamma distribution with params a and b.
    """
    if x < 0.0:
        raise ZeroDivisionError, "x must be at least 0."
    return igam( b, a * x)

def gdtrc(a, b, x):
    """Returns integral from x to inf of Gamma distribution with params a and b.
    """
    if x < 0.0:
        raise ZeroDivisionError, "x must be at least 0."
    return igamc(b, a * x)

#note: ndtri for the normal distribution is already imported

def chdtri(df, y):
    """Returns inverse of chi-squared distribution."""
    y = fix_rounding_error(y)
    if(y < 0.0 or y > 1.0 or df < 1.0):
        raise ZeroDivisionError, "y must be between 0 and 1; df >= 1"
    return 2 * igami(0.5*df, y)

def stdtri(k, p):
    """Returns inverse of Student's t distribution. k = df."""
    p = fix_rounding_error(p)
    # handle easy cases
    if k <= 0 or p < 0.0 or p > 1.0:
        raise ZeroDivisionError, "k must be >= 1, p between 1 and 0."
    rk = k
    #handle intermediate values
    if p > 0.25 and p < 0.75:
        if p == 0.5:
            return 0.0
        z = 1.0 - 2.0 * p;
        z = incbi(0.5, 0.5*rk, abs(z))
        t = sqrt(rk*z/(1.0-z))
        if p < 0.5:
            t = -t
        return t
    #handle extreme values
    rflg = -1
    if p >= 0.5:
            p = 1.0 - p;
            rflg = 1
    z = incbi(0.5*rk, 0.5, 2.0*p)

    if MAXNUM * z < rk:
        return rflg * MAXNUM
    t = sqrt(rk/z - rk)
    return rflg * t

def pdtri(k, p):
    """Inverse of Poisson distribution.

    Finds Poission mean such that integral from 0 to k is p.
    """
    p = fix_rounding_error(p)
    if k < 0 or p < 0.0 or p >= 1.0:
        raise ZeroDivisionError, "k must be >=0, p between 1 and 0."
    v = k+1;
    return igami(v, p)

def bdtri(k, n, y):
    """Inverse of binomial distribution.

    Finds binomial p such that sum of terms 0-k reaches cum probability y.
    """
    y = fix_rounding_error(y)
    if y < 0.0 or y > 1.0:
        raise ZeroDivisionError, "y must be between 1 and 0."
    if k < 0 or n <= k:
        raise ZeroDivisionError, "k must be between 0 and n"
    dn = n - k
    if k == 0:
        if y > 0.8:
            p = -expm1(log1p(y-1.0) / dn)
        else:
            p = 1.0 - y**(1.0/dn)
    else:
        dk = k + 1;
        p = incbet(dn, dk, 0.5)
        if p > 0.5:
            p = incbi(dk, dn, 1.0-y)
        else:
            p = 1.0 - incbi(dn, dk, y)
    return p

def gdtri(a, b, y):
    """Returns Gamma such that y is the probability in the integral.
    
    WARNING: if 1-y == 1, gives incorrect result. The scipy implementation
    gets around this by using cdflib, which is in Fortran. Until someone
    gets around to translating that, only use this function for values of
    p greater than 1e-15 or so!
    """
    y = fix_rounding_error(y)
    if y < 0.0 or y > 1.0 or a <= 0.0 or b < 0.0:
        raise ZeroDivisionError, "a and b must be non-negative, y from 0 to 1."
    return igami(b, 1.0-y) / a

def fdtri(a, b, y):
    """Returns inverse of F distribution."""
    y = fix_rounding_error(y)
    if( a < 1.0 or b < 1.0 or y <= 0.0 or y > 1.0):
        raise ZeroDivisionError, "y must be between 0 and 1; a and b >= 1"
    y = 1.0-y
    # Compute probability for x = 0.5
    w = incbet(0.5*b, 0.5*a, 0.5)
    # If that is greater than y, then the solution w < .5.
    # Otherwise, solve at 1-y to remove cancellation in (b - b*w).
    if w > y or y < 0.001:
            w = incbi(0.5*b, 0.5*a, y)
            x = (b - b*w)/(a*w)
    else:
            w = incbi(0.5*a, 0.5*b, 1.0-y)
            x = b*w/(a*(1.0-w))
    return x
python-cogent 1.9-9 / usr / lib / python2.7 / dist-packages / cogent / maths / stats / distribution.py