This file is indexed.

/usr/lib/python2.7/dist-packages/pebl/test/test_cpd.py is in python-pebl 1.0.2-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
from numpy import array, allclose
from pebl import data, cpd
from pebl.test import testfile

def test_cextension():
    try:
        from pebl import _cpd
    except:
        assert False

class TestCPD_Py:
    """
    Deriving loglikelihood manually
    -------------------------------

    Below is the derived calculation for the loglikelihood of the parentset for
    node 0.  Calculation done according to the g function from Cooper and
    Herskovits. This test is done with binary varaibles because doing more on paper
    is tedious. There are other tests that check for correct loglikelihood with
    more complicated data.

    data: 0110   parentset: {1,2,3} --> {0}
          1001
          1110
          1110
          0011

    ri = child.arity = 2

    parent config - (Nij+ri-1)!   -   Pi[Nijk!]
    -------------------------------------------
    000             (0+2-1)!           0!0!
    001             (1+2-1)!           0!1!
    010             (0+2-1)!           0!0!
    011             (1+2-1)!           1!0!
    100             (0+2-1)!           0!0!
    101             (0+2-1)!           0!0!
    110             (3+2-1)!           1!2!
    111             (0+2-1)!           0!0!

    likelihood  = Pi[[(ri-1)!/(Nij+ri-1)!] Pi[Nijk])
                = 1!0!0!/1! x 1!0!1!/2! x 1!0!0!/1! x
                  1!1!0!/2! x 1!1!2!/4! x 1!0!0!/1!

                = 1         x 1/2       x 1 x
                  1/2       x 1/12      x 1

                = 1/48

    loglikelihood = ln(1/48) = -3.87120101107
    """

    cpdtype = cpd.MultinomialCPD_Py

    def setUp(self):
        self.data = data.Dataset(array([[0, 1, 1, 0],
                                        [1, 0, 0, 1],
                                        [1, 1, 1, 0],
                                        [1, 1, 1, 0],
                                        [0, 0, 1, 1]]))
        for v in self.data.variables: 
            v.arity = 2
        self.cpd = self.cpdtype(self.data)
    
    def test_lnfactorial_cache(self):
        expected = array([  0.        ,   0.        ,   0.69314718,   1.79175947,
                            3.17805383,   4.78749174,   6.57925121,   8.52516136,
                            10.6046029,  12.80182748,  15.10441257,  17.50230785,
                            19.9872145,  22.55216385,  25.19122118,  27.89927138,
                            30.67186011])
        assert allclose(self.cpd.lnfactorial_cache, expected)

    def test_offsets(self):
        assert (self.cpd.offsets == array([0,1,2,4])).all()

    def test_counts(self):
        expected = array([[0, 0, 0],
                          [0, 0, 0],
                          [0, 0, 0],
                          [1, 2, 3],
                          [0, 1, 1],
                          [0, 0, 0],
                          [1, 0, 1],
                          [0, 0, 0]])
        assert (self.cpd.counts == expected).all()

    def loglikelihood(self):
        assert allclose(self.cpd.loglikelihood(), -3.87120101091)

    def test_replace1_loglikelihood(self):
        # Do a noop replace.
        self.cpd.replace_data(array([0,1,1,0]), array([0,1,1,0]))
        assert allclose(self.cpd.loglikelihood(), -3.87120101091)

    def test_replace1_counts(self):
        self.cpd.replace_data(array([0,1,1,0]), array([0,1,1,0]))
        expected = array([[0, 0, 0],
                          [0, 0, 0],
                          [0, 0, 0],
                          [1, 2, 3],
                          [0, 1, 1],
                          [0, 0, 0],
                          [1, 0, 1],
                          [0, 0, 0]])
        assert (self.cpd.counts == expected).all()
    
    def test_replace2_loglikelihood(self):
        self.cpd.replace_data(self.data.observations[0], array([1,1,1,0]))
        assert allclose(self.cpd.loglikelihood(), -2.77258872224)

    def test_replace2_counts(self):
        self.cpd.replace_data(self.data.observations[0], array([1,1,1,0]))
        expected = array([[0, 0, 0],
                          [0, 0, 0],
                          [0, 0, 0],
                          [0, 3, 3],
                          [0, 1, 1],
                          [0, 0, 0],
                          [1, 0, 1],
                          [0, 0, 0]])
        assert (self.cpd.counts == expected).all()

    def test_undo_loglikelihood(self):
        self.cpd.replace_data(self.data.observations[0], array([1,1,1,0]))
        self.cpd.replace_data(array([1,1,1,0]),array([0,1,1,0]))
        assert allclose(self.cpd.loglikelihood(), -3.87120101091)
    
    def test_undo_counts(self):
        self.cpd.replace_data(self.data.observations[0], array([1,1,1,0]))
        self.cpd.replace_data(array([1,1,1,0]),array([0,1,1,0]))
        expected = array([[0, 0, 0],
                          [0, 0, 0],
                          [0, 0, 0],
                          [1, 2, 3],
                          [0, 1, 1],
                          [0, 0, 0],
                          [1, 0, 1],
                          [0, 0, 0]])
        assert (self.cpd.counts == expected).all()
    
    def test_replace_with_ndarray(self):
        self.cpd.replace_data(array([0,1,1,0]), array([1,1,1,0]))
        assert allclose(self.cpd.loglikelihood(), -2.77258872224)


class TestCPD_C(TestCPD_Py):
    cpdtype = cpd.MultinomialCPD_C

    # The C version doesn't expose all datastructures to python
    def test_lnfactorial_cache(self): pass
    def test_offsets(self): pass
    def test_counts(self): pass
    def test_replace1_counts(self): pass
    def test_replace2_counts(self): pass
    def test_undo_counts(self): pass

class TestCPD2_Py:
    """
    Can we properly handle nodes with no parents?
    ----------------------------------------------

    With data=[1,0,1,1,0] for a node with no parents:

        ri = child.arity = 2

        parent config   (Nij+ri-1)!       Pi[Nijk!]
        -------------------------------------------
        null set        (5+2-1)!          3!2!

        likelihood = Pi[[(ri-1)!/(Nij+ri-1)!] Pi[Nijk])
                   = 1!3!2!/6!
                   = 12/720 = 1/60

        loglikelihood = ln(1/60) 
                      = -4.09434456
    """
    cpdtype = cpd.MultinomialCPD_Py

    def setUp(self):
        self.data = data.Dataset(array([[1],
                                        [0],
                                        [1],
                                        [1],
                                        [0]]))
        self.data.variables[0].arity = 2
        self.cpd = self.cpdtype(self.data)     

    def test_offsets(self):
        assert (self.cpd.offsets == array([0])).all()

    def test_counts(self):
        assert (self.cpd.counts == array([[2,3,5]])).all()

    def test_loglikelihood(self):
        assert allclose(self.cpd.loglikelihood(), -4.09434456)

class TestCPD2_C(TestCPD2_Py):
    cpdtype = cpd.MultinomialCPD_C

    # The C version doesn't expose all datastructures to python

    def test_offsets(self): pass
    def test_counts(self): pass


class TestMultinomialCPD_C:
    def setUp(self):
        self.data = data.fromfile(testfile("greedytest1-200.txt"))

    def test_cpt_reuse(self):
        # check that we don't have SegFault or BusError
        
        # instead of freeing memory, _cpd will reuse it
        for i in xrange(10000):
            c = cpd.MultinomialCPD_C(self.data)
            c.loglikelihood()
            del c

    def test_cpt_ceate_delete(self):
        # "del c" will reuse memory while "del c2" will free it
        for i in xrange(10000):
            c = cpd.MultinomialCPD_C(self.data)
            c2 = cpd.MultinomialCPD_C(self.data)
            c.loglikelihood()
            c2.loglikelihood()
            del c
            del c2