This file is indexed.

/usr/share/tcltk/tcllib1.19/math/pca.tcl is in tcllib 1.19-dfsg-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
# pca.tcl --
#     Package for principal component analysis
#
package require Tcl 8.6
package require math::linearalgebra

namespace eval ::math::PCA {
    namespace export createPCA
}

# createPCA --
#     Create a PCA object. Based on the observations the principal
#     components are determined.
#
# Arguments:
#     data           List of observations to be analysed
#     args           Option-value pairs:
#                    -covariance 1/0 - use covariances instead of correlations
#
# Returns:
#     New object holding all information that is needed
#
proc ::math::PCA::createPCA {data args} {

    return [pcaClass new $data $args]
}

# pcaClass --
#     Class holding the variables and methods for PCA
#
::oo::class create ::math::PCA::pcaClass {
     variable eigenValues      {}
     variable eigenVectors     {}
     variable retainedValues   {}
     variable retainedVectors  {}
     variable numberComponents 0
     variable numberUsed       0
     variable numberData       0
     variable mean             {}
     variable scale            {}
     variable originalData     {}

     constructor {data options} {
         variable numberComponents
         variable numberUsed

         set originalData     $data
         set numberComponents [llength [lindex $data 0]]
         set numberUsed       $numberComponents
         set numberData       [llength $data]

         if { $numberComponents < 2 } {
             return -code error "The data should contain at least two components"
         }

         set correlation 1

         foreach {option value} $options {
             switch -- $option {
                 "" {
                     # Use default
                 }
                 "-covariance" {
                     set correlation [expr {$value? 0 : 1}]
                 }
                 default {
                     return -code error "Unknown option: $option"
                 }
             }
         }

         lassign [::math::PCA::Transform $data $correlation] observations mean scale

         #
         # Determine the singular value decomposition
         # Square and scale the singular values to get the proper eigenvalues
         #
         set usv          [::math::linearalgebra::determineSVD $observations]
         set eigenVectors [lindex $usv 2]
         set singular     [lindex $usv 1]

         set factor       [expr {1.0 / ([llength $data] - 1)}]
         set eigenValues  {}
         foreach c $singular {
             lappend eigenValues [expr {$c**2 * $factor}]
         }

         #
         # By default we use all principal components
         #
         set retainedVectors $eigenVectors
         set retainedValues  $eigenValues
     }

     #
     # Get the eigenvectors - either the ones to be used or all vectors
     #
     method eigenvectors {{option {}}} {
         variable eigenVectors
         variable retainedVectors

         if { $option eq "-all" } {
             return $eigenVectors
         } else {
             return $retainedVectors
         }
     }

     #
     # Get the eigenvalues - either the ones to be used or all values
     #
     method eigenvalues {{option {}}} {
         variable eigenValues
         variable retainedValues

         if { $option eq "-all" } {
             return $eigenValues
         } else {
             return $retainedValues
         }
     }

     #
     # Approximate an observation vector using the selected components
     #
     method approximate {observation} {
         variable retainedVectors
         variable mean
         variable scale

         set z      [::math::PCA::Normalise $observation $mean $scale]
         set t      [::math::linearalgebra::matmul $z $retainedVectors]
         set zhat   [::math::linearalgebra::matmul $t [::math::linearalgebra::transpose $retainedVectors]]
         set obshat [::math::PCA::Denormalise $zhat $mean $scale]

         return $obshat
     }

     #
     # Approximate the original data - convenience method
     #
     method approximateOriginal {} {
         variable originalData

         set approximation {}

         foreach observation $originalData {
             lappend approximation [my approximate $observation]
         }

         return $approximation
     }

     #
     # Return the scores
     #
     method scores {observation} {
         variable retainedVectors
         variable mean
         variable scale

         set z [::math::PCA::Normalise $observation $mean $scale]
         return [::math::linearalgebra::matmul $z $retainedVectors]
     }

     #
     # Return the distance
     #
     method distance {observation} {
         variable retainedVectors
         variable mean
         variable scale

         set z          [normalise $observation $mean $scale]
         set t          [::math::linearalgebra::matmul $z $retainedVectors]
         set zhat       [::math::linearalgebra::matmul $t [::math::linearalgebra::transpose $retainedVectors]]
         set difference [::math::linearalgebra::sub $z $zhat]
         return [::math::linearalgebra::norm [::math::PCA::Denormalise $difference $mean $scale]]
     }

     #
     # Return the Q statistic
     #
     method qstatistic {observation {option {}}} {
         variable mean
         variable scale

         set z          [::math::PCA::Normalise $observation $mean $scale]
         set t          [::math::linearalgebra::matmul $z $retainedVectors]
         set zhat       [::math::linearalgebra::matmul $t [::math::linearalgebra::transpose $retainedVectors]]
         set difference [::math::linearalgebra::sub $z $zhat]

         set qstat      [::math::linearalgebra::dotproduct $difference $difference]
         if { $option eq "" } {
             return $qstat
         } elseif { $option eq "-original" } {
             return [expr {$qstat * double($numberData) / double($numberData - $numberUsed - 1)}]
         } else {
             return -code error "Unknown option: $option - should be \"-original\""
         }
     }

     #
     # Get the proportions - the amount of variation explained by the components
     #
     method proportions {} {
         variable retainedValues

         set unscaledProportions {}
         foreach e $retainedValues {
             lappend unscaledProportions [expr {$e**2}]
         }

         set scale [lindex $unscaledProportions end]

         foreach p $unscaledProportions {
             lappend proportions [expr {$p / $scale}]
         }
         return $proportions
     }

     #
     # Set/get number of components to be used
     #
     method using {args} {
         variable numberComponents
         variable numberUsed
         variable eigenVectors
         variable retainedVectors

         if { [llength $args] == 0 } {

             return $numberUsed

         } elseif { [llength $args] == 1 } {

             set numberUsed [lindex $args 0]
             if { ![string is integer $numberUsed] || $numberUsed < 1 || $numberUsed > $numberComponents } {
                 return -code error "Number of components to be used must be between 1 and $numberComponents"
             }

         } elseif { [llength $args] == 2 } {

             if { [lindex $args 0] == "-minproportion" } {
                 set minimum [lindex $args 1]
                 if { [string is double $minimum] || $minimum <= 0.0 || $minimum > 1.0 } {
                     return -code error "Wrong arguments: the minimum proportion must be a number between 0 and 1 - it is \"$minimum\""
                 }

                 set sum    0.0
                 set number 0
                 foreach proportion [my proportions] {
                     set  sum    [expr {$sum + $proportion}]
                     incr number

                     if { $sum >= $minimum } {
                         break
                     }
                 }
             }
             if { $number == 0 } {
                 set number 1
             }
             set numberUsed $number

         } else {
             return -code error "Wrong arguments: use either the number of components or the minimal proportion"
         }

         if { $numberUsed < $numberComponents } {
             set retainedValues  [lrange $eigenValues 0 [expr {$numberUsed-1}]]
             set retainedVectors {}
             foreach row $eigenVectors {
                 lappend retainedVectors [lrange $row 0 [expr {$numberUsed-1}]]
             }
         } else {
             set retainedValues  $eigenValues
             set retainedVectors $eigenVectors
         }

         return $numberUsed
     }
}

# Normalise --
#     Normalise a vector, given mean and standard deviation
#
# Arguments:
#     observation       Observation vector to be normalised
#     mean              Mean value to be subtracted
#     scale             Scale factor for dividing the values by
#
proc ::math::PCA::Normalise {observation mean scale} {
    set result {}

    foreach o $observation m $mean s $scale {
        lappend result [expr {($o - $m) / $s}]
    }

    return $result
}

# Denormalise --
#     Denormalise a vector, given mean and standard deviation
#
# Arguments:
#     observation       Normalised observation vector
#     mean              Mean value to be added
#     scale             Scale factor for multiplying the values by
#
proc ::math::PCA::Denormalise {observation mean scale} {
    set result {}

    foreach o $observation m $mean s $scale {
        lappend result [expr {$o * $s + $m}]
    }

    return $result
}

# Transform
#     Transform the given observations and return the transformation parameters
#
# Arguments:
#     observations      List of observation vectors
#     correlation       Use correlation (1) or not
#
proc ::math::PCA::Transform {observations correlation} {
    set columns [llength [lindex $observations 0]]
    set number  [llength $observations]
    set mean    [lrepeat $columns [expr {0.0}]]
    set scale   [lrepeat $columns [expr {0.0}]]

    foreach observation $observations {
        set newMean  {}
        set newScale {}
        foreach o $observation m $mean s $scale {
            lappend newMean  [expr {$m + $o}]
            lappend newScale [expr {$s + $o**2}]
        }
        set mean  $newMean
        set scale $newScale
    }

    set mean  {}
    set scale {}

    foreach m $newMean s $newScale {
        lappend mean  [expr {$m / $number}]
        if { $correlation } {
            set sum [expr {($s - $m**2/$number)/($number-1)}]
            lappend scale [expr {$sum >= 0.0 ? sqrt($sum) : 0.0}]
        } else {
            lappend scale 1.0
        }
    }

    set result {}
    foreach observation $observations {
         lappend result [Normalise $observation $mean $scale]
    }
    return [list $result $mean $scale]
}

package provide math::PCA 1.0

# Test
if {0} {
set data {
    {7 4 3}
    {4 1 8}
    {6 3 5}
    {8 6 1}
    {8 5 7}
    {7 2 9}
    {5 3 3}
    {9 5 8}
    {7 4 5}
    {8 2 2}
}

set pca [::math::PCA::createPCA $data]

puts [$pca using]
puts [$pca using 2]

puts [::math::PCA::Transform $data 1]
puts [::math::PCA::Normalise {1.0 2.0 3.0} {0.0 1.0 2.0} {2.0 2.0 2.0}]
puts [::math::PCA::Denormalise {0.5 0.5 0.5} {0.0 1.0 2.0} {2.0 2.0 2.0}]

puts "Eigenvalues:  [$pca eigenvalues]"
puts "Eigenvectors: [::math::linearalgebra::show [$pca eigenvectors]]"
#puts [$pca proportions] -- check the definition!
$pca using 2
puts "Observation:   [lindex $data 0]"
puts "Approximation: [$pca approximate [lindex $data 0]]"
puts "Scores:        [$pca scores [lindex $data 0]]"
puts "Q-statistic:   [$pca qstatistic [lindex $data 0]]"
puts "(corrected)    [$pca qstatistic [lindex $data 0] -original]"

#puts [::math::PCA::createPCA $data -x 1]
}