This file is indexed.

/usr/share/pyshared/Bio/LogisticRegression.py is in python-biopython 1.58-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""
This module provides code for doing logistic regressions.


Classes:
LogisticRegression    Holds information for a LogisticRegression classifier.


Functions:
train        Train a new classifier.
calculate    Calculate the probabilities of each class, given an observation.
classify     Classify an observation into a class.
"""

import numpy
import numpy.linalg

class LogisticRegression(object):
    """Holds information necessary to do logistic regression
    classification.

    Members:
    beta    List of the weights for each dimension.

    """
    def __init__(self):
        """LogisticRegression()"""
        self.beta = []

def train(xs, ys, update_fn=None, typecode=None):
    """train(xs, ys[, update_fn]) -> LogisticRegression
    
    Train a logistic regression classifier on a training set.  xs is a
    list of observations and ys is a list of the class assignments,
    which should be 0 or 1.  xs and ys should contain the same number
    of elements.  update_fn is an optional callback function that
    takes as parameters that iteration number and log likelihood.
    
    """
    if len(xs) != len(ys):
        raise ValueError("xs and ys should be the same length.")
    classes = set(ys)
    if classes != set([0, 1]):
        raise ValueError("Classes should be 0's and 1's")
    if typecode is None:
        typecode = 'd'

    # Dimensionality of the data is the dimensionality of the
    # observations plus a constant dimension.
    N, ndims = len(xs), len(xs[0]) + 1
    if N==0 or ndims==1:
        raise ValueError("No observations or observation of 0 dimension.")

    # Make an X array, with a constant first dimension.
    X = numpy.ones((N, ndims), typecode)
    X[:, 1:] = xs
    Xt = numpy.transpose(X)
    y = numpy.asarray(ys, typecode)

    # Initialize the beta parameter to 0.
    beta = numpy.zeros(ndims, typecode)

    MAX_ITERATIONS = 500
    CONVERGE_THRESHOLD = 0.01
    stepsize = 1.0
    # Now iterate using Newton-Raphson until the log-likelihoods
    # converge.
    i = 0
    old_beta = old_llik = None
    while i < MAX_ITERATIONS:
        # Calculate the probabilities.  p = e^(beta X) / (1+e^(beta X))
        ebetaX = numpy.exp(numpy.dot(beta, Xt))
        p = ebetaX / (1+ebetaX)
        
        # Find the log likelihood score and see if I've converged.
        logp = y*numpy.log(p) + (1-y)*numpy.log(1-p)
        llik = sum(logp)
        if update_fn is not None:
            update_fn(iter, llik)
        if old_llik is not None:
            # Check to see if the likelihood decreased.  If it did, then
            # restore the old beta parameters and half the step size.
            if llik < old_llik:
                stepsize = stepsize / 2.0
                beta = old_beta
            # If I've converged, then stop.
            if numpy.fabs(llik-old_llik) <= CONVERGE_THRESHOLD:
                break
        old_llik, old_beta = llik, beta
        i += 1

        W = numpy.identity(N) * p
        Xtyp = numpy.dot(Xt, y-p)         # Calculate the first derivative.
        XtWX = numpy.dot(numpy.dot(Xt, W), X)   # Calculate the second derivative.
        #u, s, vt = singular_value_decomposition(XtWX)
        #print "U", u
        #print "S", s
        delta = numpy.linalg.solve(XtWX, Xtyp)
        if numpy.fabs(stepsize-1.0) > 0.001:
            delta = delta * stepsize
        beta = beta + delta                 # Update beta.
    else:
        raise RuntimeError("Didn't converge.")

    lr = LogisticRegression()
    lr.beta = map(float, beta)   # Convert back to regular array.
    return lr

def calculate(lr, x):
    """calculate(lr, x) -> list of probabilities

    Calculate the probability for each class.  lr is a
    LogisticRegression object.  x is the observed data.  Returns a
    list of the probability that it fits each class.

    """
    # Insert a constant term for x.
    x = numpy.asarray([1.0] + x)
    # Calculate the probability.  p = e^(beta X) / (1+e^(beta X))
    ebetaX = numpy.exp(numpy.dot(lr.beta, x))
    p = ebetaX / (1+ebetaX)
    return [1-p, p]

def classify(lr, x):
    """classify(lr, x) -> 1 or 0

    Classify an observation into a class.

    """
    probs = calculate(lr, x)
    if probs[0] > probs[1]:
        return 0
    return 1