This file is indexed.

/usr/lib/python-escript-mpi/esys/downunder/splitminimizers.py is in python-escript-mpi 5.0-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
##############################################################################
#
# Copyright (c) 2014-2016 by The University of Queensland
# http://www.uq.edu.au
#
# Primary Business: Queensland, Australia
# Licensed under the Apache License, version 2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Development until 2012 by Earth Systems Science Computational Center (ESSCC)
# Development 2012-2013 by School of Earth Sciences
# Development from 2014 by Centre for Geoscience Computing (GeoComp)
#
##############################################################################

from __future__ import print_function, division

from .minimizers import AbstractMinimizer
from esys.escriptcore.splitworld import Job, FunctionJob
from .splitinversioncostfunctions import SplitInversionCostFunction
from esys.escript.pdetools import ArithmeticTuple
import numpy as np

class SplitMinimizerLBFGS(AbstractMinimizer):
    """
    Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
    method.
    
    version modified to fit with split world.
    """

    # History size
    _truncation = 30

    # Initial Hessian multiplier
    _initial_H = 1

    # Restart after this many iteration steps
    _restart = 60

    def getOptions(self):
        return {'truncation':self._truncation,'initialHessian':self._initial_H, 'restart':self._restart}

    def setOptions(self, **opts):
        self.logger.debug("Setting options: %s"%(str(opts)))
        for o in opts:
            if o=='historySize' or o=='truncation':
                self._truncation=opts[o]
            elif o=='initialHessian':
                self._initial_H=opts[o]
            elif o=='restart':
                self._restart=opts[o]
            else:
                raise KeyError("Invalid option '%s'"%o)

        # This function sets current_point=base_point+alpha*search_direction [m=m+p*a]
    @staticmethod
    def move_point_from_base(self, **kwargs):
            m=self.importValue('base_point')
            p=self.importValue('search_direction')
            a=kwargs['alpha']
            newpoint=m+p*a
            SplitInversionCostFunction.update_point_helper(self, newpoint)

    def run(self):
        """
        This version relies on the costfunction already having an initial guess loaded.
        It also does not return the result, meaning a job needs to be submitted to
        get the result out.
        """
        
        # First we'll define our own versions of the helper functions

        # Updates "g_Jx_new_0" and "g_Jx_new_1" to the gradient at the current point
        # then returns f.dualProduct of search_direction and g_Jx_new
        # Not a splitworld function
        def grad_phi(f, **kwargs):
                f.calculateGradient('g_Jx_0', 'g_Jx_1')
                # need to call dualProduct here
                def dual_p_g_Jx_new(self, **kwargs):
                    p=self.importValue("search_direction")
                    g_Jx_0=self.importValue("g_Jx_0")
                    g_Jx_1=self.importValue("g_Jx_1")
                    reg=self.importValue("regularization")
                        #again, this assumes that only the regularization term is relevant
                    res=reg.getDualProduct(p, (g_Jx_0, g_Jx_1))
                    self.exportValue("dp_result",res)
                # Now we will only run this on one world and rely on getDouble to ship it
                f.splitworld.addJob( FunctionJob, dual_p_g_Jx_new)
                f.splitworld.runJobs()
                res=f.splitworld.getFloatVariable("dp_result")
                return res
        #End of grad_phi

        def _zoom(f, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2,
                phi0, gphi0, IMAX=25):
            """
            Helper function for `line_search` below which tries to tighten the range
            alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
            J. Nocedal for an explanation.
            """
            i=0
            while True:
                alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
                f.splitworld.addJobPerWorld( FunctionJob, SplitMinimizerLBFGS.move_point_from_base, alpha=alpha, imports=['base_point', 'search_direction'])
                f.splitworld.runJobs()
                f.calculateValue('phi_a')
                phi_a=f.splitworld.getFloatVariable('phi_a')
                #zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
                if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
                    alpha_hi=alpha
                else:
                    gphi_a=grad_phi(f)
                    #zoomlogger.debug("\tgrad(phi(alpha))=%e"%(gphi_a))
                    if np.abs(gphi_a) <= -c2*gphi0:
                        break
                    if gphi_a*(alpha_hi-alpha_lo) >= 0:
                        alpha_hi = alpha_lo
                    alpha_lo=alpha
                    phi_lo=phi_a
                i+=1
                if i>IMAX:
                    gphi_a=None
                    break
            return alpha, phi_a, gphi_a

        def line_search(f, alpha=1.0, alpha_truncationax=50.0,
                        c1=1e-4, c2=0.9, IMAX=15):
            """
            Line search method that satisfies the strong Wolfe conditions.
            See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.

            This version is converted from the line_search from minimizers.py
            however, it takes fewer parameters because some of the values needed
            by the original version will be available as subworld variables rather
            than as parameters.
            
            
            :param f: callable objective function f(x)
            :param p: search direction
            :param alpha: initial step length. If g_Jx is properly scaled alpha=1 is a
                        reasonable starting value.
            :param alpha_truncationax: algorithm terminates if alpha reaches this value
            :param c1: value for Armijo condition (see reference)
            :param c2: value for curvature condition (see reference)
            :param IMAX: maximum number of iterations to perform
                        
            Removed parameters (now in subworld variables instead):
            x    - The start value for line search: in the variable "current_point".
            p    - search direction: in the variable "search_direction"
            g_Jx - value for the gradient of f at x: in the variables "g_Jx_0" and "g_Jx_1"
            Jx   - value of f(x): in the variable "Jx"
            """
            
            # This will handle subworld side of work
            def line_search_init_worker(self, **kwargs):
                x=self.importValue("current_point")
                p=self.importValue("search_direction")
                g_Jx=ArithmeticTuple(self.importValue("g_Jx_0"), self.importValue("g_Jx_1"))
                Jx=self.importValue("Jx")
                regular=self.importValue("regularization")
                phi0=Jx
               
                # In the original, this part calls getDualProduct on f
                # However, since that only ends up referring to the 
                # regularisation term, I've called that directly
                # If your dual product operation requires access to
                # the other models, then  this step needs
                # a rethink since not all models are local
                gphi0=regular.getDualProduct(p, g_Jx)
            
                #Still need to decide what this worker will do
                old_phi_a=phi0
                phi_a=phi0
                self.exportValue("old_phi_a", old_phi_a)
                self.exportValue("phi_a", phi_a)
                self.exportValue("gphi0", gphi0)
                self.exportValue("phi0", phi0)
                self.exportValue("base_point", x)       # To ensure we can revert if needed
            #End of line_search_init_worker
            
            old_alpha=0
            i=1
            f.splitworld.addJobPerWorld( FunctionJob, line_search_init_worker, imports=['search_direction', 'g_Jx_0', 'g_Jx_1', 'Jx', 'regularization'])       
            f.splitworld.runJobs()

        
            # Updates "g_Jx_new_0" and "g_Jx_new_1" to the gradient at the current point
            # then returns f.dualProduct of search_direction and g_Jx_new
            # Not a splitworld function
            def grad_phi(f, **kwargs):
                f.calculateGradient('g_Jx_0', 'g_Jx_1')
                # need to call dualProduct here
                def dual_p_g_Jx_new(self, **kwargs):
                    p=self.importValue("search_direction")
                    g_Jx_0=self.importValue("g_Jx_0")
                    g_Jx_1=self.importValue("g_Jx_1")
                    reg=self.importValue("regularization")
                        #again, this assumes that only the regularization term is relevant
                    res=reg.getDualProduct(p, (g_Jx_0, g_Jx_1))
                    self.exportValue("dp_result",res)
                # Now we will only run this on one world and rely on getDouble to ship it
                f.splitworld.addJob( FunctionJob, dual_p_g_Jx_new)
                f.splitworld.runJobs()
                res=f.splitworld.getFloatVariable("dp_result")
                return res
            #End of grad_phi

            while i<IMAX and alpha>0. and alpha<alpha_truncationax:
                alpha_at_loop_start=alpha
                f.splitworld.addJobPerWorld( FunctionJob, SplitMinimizerLBFGS.move_point_from_base, alpha=alpha, imports=['current_point', 'search_direction'])
                f.splitworld.runJobs()
                f.calculateValue('phi_a')
                #lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
                phi_a=f.splitworld.getFloatVariable('phi_a')
                phi0=f.splitworld.getFloatVariable('phi0')
                gphi0=f.splitworld.getFloatVariable('gphi0')
                old_phi_a=f.splitworld.getFloatVariable('old_phi_a')
                if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
                    alpha, phi_a, gphi_a = _zoom(f, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
                    break

                   # Need to check if alpha has changed. If it has, we need to move the point again
                if alpha_at_loop_start!=alpha:
                   f.splitworld.addJobPerWorld( FunctionJob, SplitMinimizerLBFGS.move_point_from_base, alpha=alpha, imports=['current_point', 'search_direction'])
                   f.splitworld.runJobs()

                gphi_a=grad_phi(f)
                if np.abs(gphi_a) <= -c2*gphi0:
                    break
                if gphi_a >= 0:
                    alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
                    break

                old_alpha=alpha
                # the factor is arbitrary as long as there is sufficient increase
                alpha=2.*alpha
                old_phi_a=phi_a
                i+=1
            #return alpha, phi_a, g_Jx_new[0]
            return alpha, phi_a
        #End of line_search
        
        
        
        
        
        
        
        splitworld=self.getCostFunction().splitworld 
        if self.getCostFunction().provides_inverse_Hessian_approximation:
            self.getCostFunction().updateHessian()
            invH_scale = None
        else:
            invH_scale = self._initial_H

        # start the iteration:
        n_iter = 0
        n_last_break_down=-1
        non_curable_break_down = False
        converged = False
        

            
          
        self.getCostFunction().setPoint()       # Set point to initial guess value (takes the place of a getArgs call)
        #args=self.getCostFunction().getArguments(x)
        
        self.getCostFunction().calculateValue("Jx")    #evaluate the function and store the result in the named variable
                      # note that call sets Jx=Jx_original
        splitworld.copyVariable("Jx", "Jx_original")
                      
        self.getCostFunction().calculateGradient("g_Jx_0","g_Jx_1")        #compute the gradient and store the result
        
        while not converged and not non_curable_break_down and n_iter < self._imax:
          k=0
          break_down = False
          reset_s_and_y = True
          # initial step length for line search
          alpha=1.0
          #self._doCallback(n_iter, x, Jx, g_Jx)

          while not converged and not break_down and k < self._restart and n_iter < self._imax:
                #self.logger.info("\033[1;31miteration %d\033[1;30m"%n_iter)
                self.logger.info("********** iteration %3d **********"%n_iter)
                #self.logger.info("\tJ(x) = %s"%Jx)
                #self.logger.debug("\tgrad f(x) = %s"%g_Jx)
                if invH_scale:
                    self.logger.debug("\tH = %s"%invH_scale)

                splitworld.copyVariable("g_Jx_0", "old_g_Jx_0")
                splitworld.copyVariable("g_Jx_1", "old_g_Jx_1")
                splitworld.copyVariable("Jx", "Jx_old")

                # determine search direction
                self._twoLoop(self.getCostFunction().splitworld, reset_s_and_y)
                reset_s_and_y = False

                # determine new step length using the last one as initial value
                # however, avoid using too small steps for too long.
                # FIXME: This is a bit specific to esys.downunder in that the
                # inverse Hessian approximation is not scaled properly (only
                # the regularization term is used at the moment)...
                if invH_scale is None:
                    if alpha <= 0.5:
                        alpha=2*alpha
                else:
                    # reset alpha for the case that the cost function does not
                    # provide an approximation of inverse H
                    alpha=1.0
                alpha, phi_a = line_search(self.getCostFunction(), alpha)
                # this function returns a scaling alpha for the search
                # direction as well as the cost function evaluation and
                # gradient for the new solution approximation x_new=x+alpha*p
                self.logger.debug("\tSearch direction scaling alpha=%e"%alpha)

                # execute the step
                # This update operation has already been done in the line_search
                #delta_x = alpha*p
                #x_new = x + delta_x

                Jx=splitworld.getFloatVariable("Jx")
                converged = True
                if self._J_tol:
                    Jx_old=splitworld.getFloatVariable("Jx_old")
                    Jx_original=splitworld.getFloatVariable("Jx_original")
                    flag=abs(Jx-Jx_old) <= self._J_tol * abs(Jx-Jx_original)
                    #flag=abs(Jx_new-Jx) <= self._J_tol * abs(Jx_new-Jx_0)
                    if self.logger.isEnabledFor(logging.DEBUG):
                        if flag:
                            self.logger.debug("Cost function has converged: dJ, J*J_tol = %e, %e"%(Jx_old-Jx,abs(Jx-Jx_original)*self._J_tol))
                        else:
                            self.logger.debug("Cost function checked: dJ, J*J_tol = %e, %e"%(Jx_old-Jx,abs(Jx)*self._J_tol))

                    converged = converged and flag
                if self._m_tol:
                    def converged_check(self, **kwargs):
                        alpha=kwargs["alpha"]
                        m_tol=kwargs["m_tol"]
                        reg=self.importValue("regularization")
                        p=self.importValue("search_direction")
                        delta_x=alpha*p
                        x=self.importValue("current_point")
                        norm_x = reg.getNorm(x)
                        norm_dx = reg.getNorm(delta_x)
                        flag = norm_dx <= m_tol * norm_x
                        #if self.logger.isEnabledFor(logging.DEBUG):
                            #if flag:
                                #self.logger.debug("Solution has converged: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
                            #else:
                                #self.logger.debug("Solution checked: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
                        self.exportValue('conv_flag', flag)
                    # End of converged_check 
                    self.getCostFunction().splitworld.addJobPerWorld(FunctionJob, converged_check, alpha=alpha, m_tol=self._m_tol, imports=["regularization", "search_direction", "current_point"])
                    self.getCostFunction().splitworld.runJobs()
                    flag=self.getCostFunction().splitworld.getFloatVariable("conv_flag")>0.001
                    converged = converged and flag

                #Already done in the line_search call
                #x=x_new
                if converged:
                    self.logger.info("\tJ(x) = %s"%Jx)
                    break

                # unfortunately there is more work to do!
                def run_worker(self, **kwargs):
                    break_down=False
                    need_trunc=kwargs["need_trunc"]
                    # Need to do some imports here
                    alpha=kwargs["alpha"]
                    g_Jx=ArithmeticTuple(self.importValue("g_Jx_0"), self.importValue("g_Jx_1"))
                    old_g_Jx=ArithmeticTuple(self.importValue("old_g_Jx_0"), self.importValue("old_g_Jx_1"))
                    p=self.importValue("search_direction")
                    reg=self.importValue("regularization")
                    s_and_y=self.importValue("s_and_y")

                    ##The original code had this check
                    #if g_Jx_new is None:
                        #args=self.getCostFunction().getArguments(x_new)
                        #g_Jx_new=self.getCostFunction().getGradient(x_new, args)
                    #delta_g=g_Jx_new-g_Jx
                    delta_g=g_Jx-old_g_Jx
                    delta_x=alpha*p;
                    rho=reg.getDualProduct(delta_x, delta_g)
                    if abs(rho)>0:
                        s_and_y.append((delta_x,delta_g, rho ))
                    else:
                        break_down=True
                    if need_trunc:
                        s_and_y.pop(0)
                    self.exportValue("break_down", break_down)
                    self.exportValue("s_and_y",s_and_y)
                # End run_worker
                # Only one world has s_and_y - so its important that only single jobs be run that manipulate
                # s_and_y
                self.getCostFunction().splitworld.addJob( FunctionJob, run_worker, alpha=alpha, need_trunc=(k>=self._truncation))
                self.getCostFunction().splitworld.runJobs()

                break_down=(splitworld.getLocalObjectVariable("break_down")>0.001)
                self.getCostFunction().updateHessian()
                #g_Jx=g_Jx_new
                #Jx=Jx_new

                k+=1
                n_iter+=1
                #self._doCallback(n_iter, x, Jx, g_Jx)

                # delete oldest vector pair
                # Already done in the run_worker
                #if k>self._truncation: s_and_y.pop(0)

                if not self.getCostFunction().provides_inverse_Hessian_approximation and not break_down:
                    # set the new scaling factor (approximation of inverse Hessian)
                    denom=self.getCostFunction().getDualProduct(delta_g, delta_g)
                    if denom > 0:
                        invH_scale=self.getCostFunction().getDualProduct(delta_x,delta_g)/denom
                    else:
                        invH_scale=self._initial_H
                        self.logger.debug("** Break down in H update. Resetting to initial value %s."%self._initial_H)
          # case handling for inner iteration:
          if break_down:
              if n_iter == n_last_break_down+1:
                  non_curable_break_down = True
                  self.logger.debug("** Incurable break down detected in step %d."%n_iter)
              else:
                  n_last_break_down = n_iter
                  self.logger.debug("** Break down detected in step %d. Iteration is restarted."%n_iter)
          if not k < self._restart:
              self.logger.debug("Iteration is restarted after %d steps."%n_iter)

        # case handling for inner iteration:
        #self._result=x
        self._result=None
        if n_iter >= self._imax:
            self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
            raise MinimizerMaxIterReached("Gave up after %d steps."%n_iter)
        elif non_curable_break_down:
            self.logger.warn(">>>>>>>>>> Incurable breakdown! <<<<<<<<<<")
            raise MinimizerIterationIncurableBreakDown("Gave up after %d steps."%n_iter)

        self.logger.info("Success after %d iterations!"%n_iter)
        #This version does nor return the result
        #You need to set up a job to extract the result

    def _twoLoop(self, splitworld, reset):
        """
        Helper for the L-BFGS method.
        See 'Numerical Optimization' by J. Nocedal for an explanation.

        This has been converted to use splitworld.
          As such it doesn't return a result, instead it stores
          The "p" result in the "search_direction" variable

        Depends on the following splitworld variables:
          g_Jx_0, g_Jx_1
          current_point
          s_and_y 

        Other conversion notes:
          The current cost function's inverseHessianApproximation and
          dualProduct only
          depend on the regularization term so this function can be 
          done entirely by one world.   If the Hessian/dp ever needed
          other worlds, this will need to be reworked.
          Implicit in the above is that the overall cost function
          has cf.provides_inverse_Hessian_approximation==True
        """
        # Make a fn to push the work into subworld
        def two_loop_worker(self, **kwargs):
            reset=kwargs['reset']
            x=self.importValue("current_point")
            reg=self.importValue("regularization")
            g_Jx=ArithmeticTuple(self.importValue("g_Jx_0"), self.importValue("g_Jx_1"))
            if reset:
                s_and_y = []
                self.exportValue('s_and_y', list())
            else:
                s_and_y=self.importValue("s_and_y")
            q=g_Jx
            alpha=[]
            for s,y, rho in reversed(s_and_y):
                a=reg.getDualProduct(s, q)/rho
                alpha.append(a)
                q=q-a*y

            r=reg.getInverseHessianApproximationAtPoint(q)

            for s,y,rho in s_and_y:
                beta = reg.getDualProduct(r, y)/rho
                a = alpha.pop()
                r = r + s * (a-beta)
            # In the original version, the caller negated the result
            self.exportValue("search_direction", -r)
        # Note: this is only running on world world (of possibly many)
        # Any jobs which also need the variables exported here
        # need to be shared or the jobs need to run on the same world
        splitworld.addJob(FunctionJob, two_loop_worker, reset=reset, imports=["current_point", "regularization", "g_Jx_0", "g_Jx_1"])
        splitworld.runJobs()