This file is indexed.

/usr/lib/python2.7/dist-packages/pebl/learner/greedy.py is in python-pebl 1.0.2-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""Learner that implements a greedy learning algorithm"""

import time

from pebl import network, result, evaluator
from pebl.util import *
from pebl.learner.base import *

class GreedyLearnerStatistics:
    def __init__(self):
        self.restarts = -1
        self.iterations = 0
        self.unimproved_iterations = 0
        self.best_score = 0
        self.start_time = time.time()

    @property
    def runtime(self):
        return time.time() - self.start_time

class GreedyLearner(Learner):
    #
    # Parameters
    #
    _params =  (
        config.IntParameter(
            'greedy.max_iterations',
            """Maximum number of iterations to run.""",
            default=1000
        ),
        config.IntParameter(
            'greedy.max_time',
            """Maximum learner runtime in seconds.""",
            default=0
        ),
        config.IntParameter(
            'greedy.max_unimproved_iterations',
            """Maximum number of iterations without score improvement before
            a restart.""", 
            default=500
        ),
        config.StringParameter(
            'greedy.seed',
            'Starting network for a greedy search.',
            default=''
        )
    )

    def __init__(self, data_=None, prior_=None, **options):
        """
        Create a learner that uses a greedy learning algorithm.

        The algorithm works as follows:

            1. start with a random network
            2. Make a small, local change and rescore network
            3. If new network scores better, accept it, otherwise reject.
            4. Steps 2-3 are repeated till the restarting_criteria is met, at
               which point we begin again with a new random network (step 1)
        
        Any config param for 'greedy' can be passed in via options.
        Use just the option part of the parameter name.

        For more information about greedy learning algorithms, consult:

            1. http://en.wikipedia.org/wiki/Greedy_algorithm
            2. D. Heckerman. A Tutorial on Learning with Bayesian Networks. 
               Microsoft Technical Report MSR-TR-95-06, 1995. p.35.
            
        """

        super(GreedyLearner, self).__init__(data_, prior_)
        self.options = options
        config.setparams(self, options)
        if not isinstance(self.seed, network.Network):
            self.seed = network.Network(self.data.variables, self.seed)
        
    def run(self):
        """Run the learner.

        Returns a LearnerResult instance. Also sets self.result to that
        instance.  
        
        """

        # max_time and max_iterations are mutually exclusive stopping critera
        if 'max_time' not in self.options:
            _stop = self._stop_after_iterations
        else:
            _stop = self._stop_after_time
            
        self.stats = GreedyLearnerStatistics()
        self.result = result.LearnerResult(self)
        self.evaluator = evaluator.fromconfig(self.data, self.seed, self.prior)
        self.evaluator.score_network(self.seed.copy())

        first = True
        self.result.start_run()
        while not _stop():
            self._run_without_restarts(_stop, self._restart, 
                                       randomize_net=(not first))
            first = False
        self.result.stop_run()

        return self.result

    def _run_without_restarts(self, _stop, _restart, randomize_net=True):
        self.stats.restarts += 1
        self.stats.unimproved_iterations = 0

        if randomize_net:
            self.evaluator.randomize_network()
         
        # set the default best score
        self.stats.best_score = self.evaluator.score_network()

        # continue learning until time to stop or restart
        while not (_restart() or _stop()):
            self.stats.iterations += 1

            try:
                curscore = self._alter_network_randomly_and_score()
            except CannotAlterNetworkException:
                return
            
            self.result.add_network(self.evaluator.network, curscore)

            if curscore <= self.stats.best_score:
                # score did not improve, undo network alteration
                self.stats.unimproved_iterations += 1
                self.evaluator.restore_network()
            else:
                self.stats.best_score = curscore
                self.stats.unimproved_iterations = 0

    #
    # Stopping and restarting criteria
    # 
    def _stop_after_time(self):
        return self.stats.runtime >= self.max_time

    def _stop_after_iterations(self):
        return self.stats.iterations >= self.max_iterations

    def _restart(self):
        return self.stats.unimproved_iterations >= self.max_unimproved_iterations