This file is indexed.

/usr/share/pyshared/Bio/kNN.py is in python-biopython 1.58-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env python
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""
This module provides code for doing k-nearest-neighbors classification.

k Nearest Neighbors is a supervised learning algorithm that classifies
a new observation based the classes in its surrounding neighborhood.

Glossary:
distance   The distance between two points in the feature space.
weight     The importance given to each point for classification. 


Classes:
kNN           Holds information for a nearest neighbors classifier.


Functions:
train        Train a new kNN classifier.
calculate    Calculate the probabilities of each class, given an observation.
classify     Classify an observation into a class.

    Weighting Functions:
equal_weight    Every example is given a weight of 1.

"""

import numpy

class kNN(object):
    """Holds information necessary to do nearest neighbors classification.

    Members:
    classes  Set of the possible classes.
    xs       List of the neighbors.
    ys       List of the classes that the neighbors belong to.
    k        Number of neighbors to look at.

    """
    def __init__(self):
        """kNN()"""
        self.classes = set()
        self.xs = []
        self.ys = []
        self.k = None

def equal_weight(x, y):
    """equal_weight(x, y) -> 1"""
    # everything gets 1 vote
    return 1

def train(xs, ys, k, typecode=None):
    """train(xs, ys, k) -> kNN
    
    Train a k nearest neighbors classifier on a training set.  xs is a
    list of observations and ys is a list of the class assignments.
    Thus, xs and ys should contain the same number of elements.  k is
    the number of neighbors that should be examined when doing the
    classification.
    
    """
    knn = kNN()
    knn.classes = set(ys)
    knn.xs = numpy.asarray(xs, typecode)
    knn.ys = ys
    knn.k = k
    return knn

def calculate(knn, x, weight_fn=equal_weight, distance_fn=None):
    """calculate(knn, x[, weight_fn][, distance_fn]) -> weight dict

    Calculate the probability for each class.  knn is a kNN object.  x
    is the observed data.  weight_fn is an optional function that
    takes x and a training example, and returns a weight.  distance_fn
    is an optional function that takes two points and returns the
    distance between them.  If distance_fn is None (the default), the
    Euclidean distance is used.  Returns a dictionary of the class to
    the weight given to the class.
    
    """
    x = numpy.asarray(x)

    order = []  # list of (distance, index)
    if distance_fn:
        for i in range(len(knn.xs)):
            dist = distance_fn(x, knn.xs[i])
            order.append((dist, i))
    else:
        # Default: Use a fast implementation of the Euclidean distance
        temp = numpy.zeros(len(x))
        # Predefining temp allows reuse of this array, making this
        # function about twice as fast.
        for i in range(len(knn.xs)):
            temp[:] = x - knn.xs[i]
            dist = numpy.sqrt(numpy.dot(temp,temp))
            order.append((dist, i))
    order.sort()

    # first 'k' are the ones I want.
    weights = {}  # class -> number of votes
    for k in knn.classes:
        weights[k] = 0.0
    for dist, i in order[:knn.k]:
        klass = knn.ys[i]
        weights[klass] = weights[klass] + weight_fn(x, knn.xs[i])

    return weights

def classify(knn, x, weight_fn=equal_weight, distance_fn=None):
    """classify(knn, x[, weight_fn][, distance_fn]) -> class

    Classify an observation into a class.  If not specified, weight_fn will
    give all neighbors equal weight.  distance_fn is an optional function
    that takes two points and returns the distance between them.  If
    distance_fn is None (the default), the Euclidean distance is used.
    """
    weights = calculate(
        knn, x, weight_fn=weight_fn, distance_fn=distance_fn)

    most_class = None
    most_weight = None
    for klass, weight in weights.items():
        if most_class is None or weight > most_weight:
            most_class = klass
            most_weight = weight
    return most_class