This file is indexed.

/usr/share/pyshared/mvpa/mappers/base.py is in python-mvpa 0.4.8-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Data mapper"""

__docformat__ = 'restructuredtext'

import numpy as N

from mvpa.mappers.metric import Metric

from mvpa.datasets import Dataset
from mvpa.misc.vproperty import VProperty
from mvpa.base.dochelpers import enhancedDocString

if __debug__:
    from mvpa.base import warning
    from mvpa.base import debug


class Mapper(object):
    """Interface to provide mapping between two spaces: IN and OUT.
    Methods are prefixed correspondingly. forward/reverse operate
    on the entire dataset. get(In|Out)Id[s] operate per element::

              forward
             --------->
         IN              OUT
             <--------/
               reverse
    """
    def __init__(self, metric=None):
        """
        :Parameters:
          metric : Metric
            Optional metric
        """
        self.__metric = None
        """Pylint happiness"""
        self.setMetric(metric)
        """Actually assign the metric"""

    #
    # The following methods are abstract and merely define the intended
    # interface of a mapper and have to be implemented in derived classes. See
    # the docstrings of the respective methods for details about what they
    # should do.
    #

    def forward(self, data):
        """Map data from the IN dataspace into OUT space.
        """
        raise NotImplementedError


    def reverse(self, data):
        """Reverse map data from OUT space into the IN space.
        """
        raise NotImplementedError


    def getInSize(self):
        """Returns the size of the entity in input space"""
        raise NotImplementedError


    def getOutSize(self):
        """Returns the size of the entity in output space"""
        raise NotImplementedError


    def selectOut(self, outIds):
        """Limit the OUT space to a certain set of features.

        :Parameters:
          outIds: sequence
            Subset of ids of the current feature in OUT space to keep.
        """
        raise NotImplementedError


    def getInId(self, outId):
        """Translate a feature id into a coordinate/index in input space.

        Such a translation might not be meaningful or even possible for a
        particular mapping algorithm and therefore cannot be relied upon.
        """
        raise NotImplementedError


    #
    # The following methods are candidates for reimplementation in derived
    # classes, in cases where the provided default behavior is not appropriate.
    #
    def isValidOutId(self, outId):
        """Validate feature id in OUT space.

        Override if OUT space is not simly a 1D vector
        """
        return(outId >= 0 and outId < self.getOutSize())


    def isValidInId(self, inId):
        """Validate id in IN space.

        Override if IN space is not simly a 1D vector
        """
        return(inId >= 0 and inId < self.getInSize())


    def train(self, dataset):
        """Perform training of the mapper.

        This method is called to put the mapper in a state that allows it to
        perform to intended mapping.

        :Parameter:
          dataset: Dataset or subclass

        .. note::
          The default behavior of this method is to do nothing.
        """
        pass


    def getNeighbor(self, outId, *args, **kwargs):
        """Get feature neighbors in input space, given an id in output space.

        This method has to be reimplemented whenever a derived class does not
        provide an implementation for :meth:`~mvpa.mappers.base.Mapper.getInId`.
        """
        if self.metric is None:
            raise RuntimeError, "No metric was assigned to %s, thus no " \
                  "neighboring information is present" % self

        if self.isValidOutId(outId):
            inId = self.getInId(outId)
            for inId in self.getNeighborIn(inId, *args, **kwargs):
                yield self.getOutId(inId)


    #
    # The following methods provide common functionality for all mappers
    # and there should be no immediate need to reimplement them
    #
    def getNeighborIn(self, inId, *args, **kwargs):
        """Return the list of coordinates for the neighbors.

        :Parameters:
          inId
            id (index) of an element in input dataspace.
          *args, **kwargs
            Any additional arguments are passed to the embedded metric of the
            mapper.

        XXX See TODO below: what to return -- list of arrays or list
        of tuples?
        """
        if self.metric is None:
            raise RuntimeError, "No metric was assigned to %s, thus no " \
                  "neighboring information is present" % self

        isValidInId = self.isValidInId
        if isValidInId(inId):
            for neighbor in self.metric.getNeighbor(inId, *args, **kwargs):
                if isValidInId(neighbor):
                    yield neighbor


    def getNeighbors(self, outId, *args, **kwargs):
        """Return the list of coordinates for the neighbors.

        By default it simply constructs the list based on
        the generator returned by getNeighbor()
        """
        return [ x for x in self.getNeighbor(outId, *args, **kwargs) ]


    def __repr__(self):
        if self.__metric is not None:
            s = "metric=%s" % repr(self.__metric)
        else:
            s = ''
        return "%s(%s)" % (self.__class__.__name__, s)


    def __call__(self, data):
        """Calls the mappers forward() method.
        """
        return self.forward(data)


    def getMetric(self):
        """To make pylint happy"""
        return self.__metric


    def setMetric(self, metric):
        """To make pylint happy"""
        if metric is not None and not isinstance(metric, Metric):
            raise ValueError, "metric for Mapper must be an " \
                              "instance of a Metric class . Got %s" \
                                % `type(metric)`
        self.__metric = metric


    metric = property(fget=getMetric, fset=setMetric)
    nfeatures = VProperty(fget=getOutSize)



class ProjectionMapper(Mapper):
    """Linear mapping between multidimensional spaces.

    This class cannot be used directly. Sub-classes have to implement
    the `_train()` method, which has to compute the projection matrix
    `_proj` and optionally offset vectors `_offset_in` and
    `_offset_out` (if initialized with demean=True, which is default)
    given a dataset (see `_train()` docstring for more information).

    Once the projection matrix is available, this class provides
    functionality to perform forward and backwards linear mapping of
    data, the latter by default using pseudo-inverse (but could be
    altered in subclasses, like hermitian (conjugate) transpose in
    case of SVD).  Additionally, `ProjectionMapper` supports optional
    selection of arbitrary component (i.e. columns of the projection
    matrix) of the projection.

    Forward and back-projection matrices (a.k.a. *projection* and
    *reconstruction*) are available via the `proj` and `recon`
    properties.
    """

    _DEV__doc__ = """Think about renaming `demean`, may be `translation`?"""

    def __init__(self, selector=None, demean=True):
        """Initialize the ProjectionMapper

        :Parameters:
          selector: None | list
            Which components (i.e. columns of the projection matrix)
            should be used for mapping. If `selector` is `None` all
            components are used. If a list is provided, all list
            elements are treated as component ids and the respective
            components are selected (all others are discarded).
          demean: bool
            Either data should be demeaned while computing
            projections and applied back while doing reverse()
        """
        Mapper.__init__(self)

        self._selector = selector
        self._proj = None
        """Forward projection matrix."""
        self._recon = None
        """Reverse projection (reconstruction) matrix."""
        self._demean = demean
        """Flag whether to demean the to be projected data, prior to projection.
        """
        self._offset_in = None
        """Offset (most often just mean) in the input space"""
        self._offset_out = None
        """Offset (most often just mean) in the output space"""

    __doc__ = enhancedDocString('ProjectionMapper', locals(), Mapper)


    def train(self, dataset, *args, **kwargs):
        """Determine the projection matrix.

        :Parameters:
          dataset : Dataset
            Dataset to operate on
          *args
            Optional positional arguments to pass to _train
            of subclass
          **kwargs
            Optional keyword arguments to pass to _train
            of subclass
        """
        # store the feature wise mean
        if isinstance(dataset, Dataset):
            samples = dataset.samples
        else:
            samples = dataset
        self._offset_in = samples.mean(axis=0)
        # ??? Setting of _offset_out is to be done in a child
        # class

        # compute projection matrix with subclass logic
        self._train(dataset, *args, **kwargs)

        # perform component selection
        if self._selector is not None:
            self.selectOut(self._selector)


    def _demeanData(self, data):
        """Helper which optionally demeans
        """
        if self._demean:
            # demean the training data
            data = data - self._offset_in

            if __debug__ and "MAP_" in debug.active:
                debug("MAP_",
                      "%s: Mean of data in input space %s was subtracted" %
                      (self.__class__.__name__, self._offset_in))
        return data


    def _train(self, dataset):
        """Worker method. Needs to be implemented by subclass.

        This method has to train the mapper and store the resulting
        transformation matrix in `self._proj`.
        """
        raise NotImplementedError


    def forward(self, data, demean=None):
        """Perform forward projection.

        :Parameters:
          data: ndarray
            Data array to map
          demean: boolean | None
            Override demean setting for this method call.

        :Returns:
          NumPy array
        """
        # let arg overwrite instance flag
        if demean is None:
            demean = self._demean

        if self._proj is None:
            raise RuntimeError, "Mapper needs to be train before used."

        d = N.asmatrix(data)

        # Remove input offset if present
        if demean and self._offset_in is not None:
            d = d - self._offset_in

        # Do forward projection
        res = (d * self._proj).A

        # Add output offset if present
        if demean and self._offset_out is not None:
            res += self._offset_out

        return res


    def reverse(self, data):
        """Reproject (reconstruct) data into the original feature space.

        :Returns:
          NumPy array
        """
        if self._proj is None:
            raise RuntimeError, "Mapper needs to be trained before used."
        d = N.asmatrix(data)
        # Remove offset if present in output space
        if self._demean and self._offset_out is not None:
            d = d - self._offset_out

        # Do reverse projection
        res = (d * self.recon).A

        # Add offset in input space
        if self._demean and self._offset_in is not None:
            res += self._offset_in

        return res

    def _computeRecon(self):
        """Given that a projection is present -- compute reconstruction matrix.
        By default -- pseudoinverse of projection matrix.  Might be overridden
        in derived classes for efficiency.
        """
        return N.linalg.pinv(self._proj)

    def _getRecon(self):
        """Compute (if necessary) and return reconstruction matrix
        """
        # (re)build reconstruction matrix
        recon = self._recon
        if recon is None:
            self._recon = recon = self._computeRecon()
        return recon


    def getInSize(self):
        """Returns the number of original features."""
        return self._proj.shape[0]


    def getOutSize(self):
        """Returns the number of components to project on."""
        return self._proj.shape[1]


    def selectOut(self, outIds):
        """Choose a subset of components (and remove all others)."""
        self._proj = self._proj[:, outIds]
        if self._offset_out is not None:
            self._offset_out = self._offset_out[outIds]
        # invalidate reconstruction matrix
        self._recon = None

    proj  = property(fget=lambda self: self._proj, doc="Projection matrix")
    recon = property(fget=_getRecon, doc="Backprojection matrix")



class CombinedMapper(Mapper):
    """Meta mapper that combines several embedded mappers.

    This mapper can be used the map from several input dataspaces into a common
    output dataspace. When :meth:`~mvpa.mappers.base.CombinedMapper.forward`
    is called with a sequence of data, each element in that sequence is passed
    to the corresponding mapper, which in turned forward-maps the data. The
    output of all mappers is finally stacked (horizontally or column or
    feature-wise) into a single large 2D matrix (nsamples x nfeatures).

    .. note::
      This mapper can only embbed mappers that transform data into a 2D
      (nsamples x nfeatures) representation. For mappers not supporting this
      transformation, consider wrapping them in a
      :class:`~mvpa.mappers.base.ChainMapper` with an appropriate
      post-processing mapper.

    CombinedMapper fully supports forward and backward mapping, training,
    runtime selection of a feature subset (in output dataspace) and retrieval
    of neighborhood information.
    """
    def __init__(self, mappers, **kwargs):
        """
        :Parameters:
          mappers: list of Mapper instances
            The order of the mappers in the list is important, as it will define
            the order in which data snippets have to be passed to
            :meth:`~mvpa.mappers.base.CombinedMapper.forward`.
          **kwargs
            All additional arguments are passed to the base-class constructor.
        """
        Mapper.__init__(self, **kwargs)

        if not len(mappers):
            raise ValueError, \
                  'CombinedMapper needs at least one embedded mapper.'

        self._mappers = mappers


    def forward(self, data):
        """Map data from the IN spaces into to common OUT space.

        :Parameter:
          data: sequence
            Each element in the `data` sequence is passed to the corresponding
            embedded mapper and is mapped individually by it. The number of
            elements in `data` has to match the number of embedded mappers. Each
            element is `data` has to provide the same number of samples
            (first dimension).

        :Returns:
          array: nsamples x nfeatures
            Horizontally stacked array of all embedded mapper outputs.
        """
        if not len(data) == len(self._mappers):
            raise ValueError, \
                  "CombinedMapper needs a sequence with data for each " \
                  "Mapper"

        # return a big array for the result of the forward mapped data
        # of each embedded mapper
        try:
            return N.hstack(
                    [self._mappers[i].forward(d) for i, d in enumerate(data)])
        except ValueError:
            raise ValueError, \
                  "Embedded mappers do not generate same number of samples. " \
                  "Check input data."


    def reverse(self, data):
        """Reverse map data from OUT space into the IN spaces.

        :Parameter:
          data: array
            Single data array to be reverse mapped into a sequence of data
            snippets in their individual IN spaces.

        :Returns:
          list
        """
        # assure array and transpose
        # i.e. transpose of 1D does nothing, but of 2D puts features
        # along first dimension
        data = N.asanyarray(data).T

        if not len(data) == self.getOutSize():
            raise ValueError, \
                  "Data shape does match mapper reverse mapping properties."

        result = []
        fsum = 0
        for m in self._mappers:
            # calculate upper border
            fsum_new = fsum + m.getOutSize()

            result.append(m.reverse(data[fsum:fsum_new].T))

            fsum = fsum_new

        return result


    def train(self, dataset):
        """Trains all embedded mappers.

        The provided training dataset is splitted appropriately and the
        corresponding pieces are passed to the
        :meth:`~mvpa.mappers.base.Mapper.train` method of each embedded mapper.

        :Parameter:
          dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
            A dataset with the number of features matching the `outSize` of the
            `CombinedMapper`.
        """
        if dataset.nfeatures != self.getOutSize():
            raise ValueError, "Training dataset does not match the mapper " \
                              "properties."

        fsum = 0
        for m in self._mappers:
            # need to split the dataset
            fsum_new = fsum + m.getOutSize()
            m.train(dataset.selectFeatures(range(fsum, fsum_new)))
            fsum = fsum_new


    def getInSize(self):
        """Returns the size of the entity in input space"""
        return N.sum(m.getInSize() for m in self._mappers)


    def getOutSize(self):
        """Returns the size of the entity in output space"""
        return N.sum(m.getOutSize() for m in self._mappers)


    def selectOut(self, outIds):
        """Remove some elements and leave only ids in 'out'/feature space.

        .. note::
          The subset selection is done inplace

        :Parameter:
          outIds: sequence
            All output feature ids to be selected/kept.
        """
        # determine which features belong to what mapper
        # and call its selectOut() accordingly
        ids = N.asanyarray(outIds)
        fsum = 0
        for m in self._mappers:
            # bool which meta feature ids belongs to this mapper
            selector = N.logical_and(ids < fsum + m.getOutSize(), ids >= fsum)
            # make feature ids relative to this dataset
            selected = ids[selector] - fsum
            fsum += m.getOutSize()
            # finally apply to mapper
            m.selectOut(selected)


    def getNeighbor(self, outId, *args, **kwargs):
        """Get the ids of the neighbors of a single feature in output dataspace.

        :Parameters:
          outId: int
            Single id of a feature in output space, whos neighbors should be
            determined.
          *args, **kwargs
            Additional arguments are passed to the metric of the embedded
            mapper, that is responsible for the corresponding feature.

        Returns a list of outIds
        """
        fsum = 0
        for m in self._mappers:
            fsum_new = fsum + m.getOutSize()
            if outId >= fsum and outId < fsum_new:
                return m.getNeighbor(outId - fsum, *args, **kwargs)
            fsum = fsum_new

        raise ValueError, "Invalid outId passed to CombinedMapper.getNeighbor()"


    def __repr__(self):
        s = Mapper.__repr__(self).rstrip(' )')
        # beautify
        if not s[-1] == '(':
            s += ' '
        s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers])
        return s



class ChainMapper(Mapper):
    """Meta mapper that embedded a chain of other mappers.

    Each mapper in the chain is called successively to perform forward or
    reverse mapping.

    .. note::

      In its current implementation the `ChainMapper` treats all but the last
      mapper as simple pre-processing (in forward()) or post-processing (in
      reverse()) steps. All other capabilities, e.g. training and neighbor
      metrics are provided by or affect *only the last mapper in the chain*.

      With respect to neighbor metrics this means that they are determined
      based on the input space of the *last mapper* in the chain and *not* on
      the input dataspace of the `ChainMapper` as a whole
    """
    def __init__(self, mappers, **kwargs):
        """
        :Parameters:
          mappers: list of Mapper instances
          **kwargs
            All additional arguments are passed to the base-class constructor.
        """
        Mapper.__init__(self, **kwargs)

        if not len(mappers):
            raise ValueError, 'ChainMapper needs at least one embedded mapper.'

        self._mappers = mappers


    def forward(self, data):
        """Calls all mappers in the chain successively.

        :Parameter:
          data
            data to be chain-mapped.
        """
        mp = data
        for m in self._mappers:
            mp = m.forward(mp)

        return mp


    def reverse(self, data):
        """Calls all mappers in the chain successively, in reversed order.

        :Parameter:
          data: array
            data array to be reverse mapped into the orginal dataspace.
        """
        mp = data
        for m in reversed(self._mappers):
            mp = m.reverse(mp)

        return mp


    def train(self, dataset):
        """Trains the *last* mapper in the chain.

        :Parameter:
          dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
            A dataset with the number of features matching the `outSize` of the
            last mapper in the chain (which is identical to the one of the
            `ChainMapper` itself).
        """
        if dataset.nfeatures != self.getOutSize():
            raise ValueError, "Training dataset does not match the mapper " \
                              "properties."

        self._mappers[-1].train(dataset)


    def getInSize(self):
        """Returns the size of the entity in input space"""
        return self._mappers[0].getInSize()


    def getOutSize(self):
        """Returns the size of the entity in output space"""
        return self._mappers[-1].getOutSize()


    def selectOut(self, outIds):
        """Remove some elements from the *last* mapper in the chain.

        :Parameter:
          outIds: sequence
            All output feature ids to be selected/kept.
        """
        self._mappers[-1].selectOut(outIds)


    def getNeighbor(self, outId, *args, **kwargs):
        """Get the ids of the neighbors of a single feature in output dataspace.

        .. note::

          The neighbors are determined based on the input space of the *last
          mapper* in the chain and *not* on the input dataspace of the
          `ChainMapper` as a whole!

        :Parameters:
          outId: int
            Single id of a feature in output space, whos neighbors should be
            determined.
          *args, **kwargs
            Additional arguments are passed to the metric of the embedded
            mapper, that is responsible for the corresponding feature.

        Returns a list of outIds
        """
        return self._mappers[-1].getNeighbor(outId, *args, **kwargs)


    def __repr__(self):
        s = Mapper.__repr__(self).rstrip(' )')
        # beautify
        if not s[-1] == '(':
            s += ' '
        s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers])
        return s