/usr/share/tcltk/tcllib1.14/math/kruskal.tcl is in tcllib 1.14-dfsg-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | # kruskal.tcl --
# Procedures related to ranking and the Kruskal-Wallis test
#
# test-Kruskal-Wallis --
# Perform a one-way analysis of variance according
# to Kruskal-Wallis
#
# Arguments:
# confidence Confidence level (between 0 and 1)
# args Two or more lists of data
#
# Result:
# 0 if the medians of the groups differ, 1 if they
# are the same (accept the null hypothesis)
#
proc ::math::statistics::test-Kruskal-Wallis {confidence args} {
foreach {H p} [eval analyse-Kruskal-Wallis $args] {break}
expr {$p < 1.0 - $confidence}
}
# analyse-Kruskal-Wallis --
# Perform a one-way analysis of variance according
# to Kruskal-Wallis and return the details
#
# Arguments:
# args Two or more lists of data
#
# Result:
# Kruskal-Wallis statistic H and the probability p
# that this value occurs if the
#
proc ::math::statistics::analyse-Kruskal-Wallis {args} {
set setCount [llength $args]
#
# Rank the data with respect to the whole set
#
set rankList [eval group-rank $args]
set length [llength $rankList]
#
# Re-establish original sets of values, but using the ranks
#
foreach item $rankList {
lappend rankValues([lindex $item 0]) [lindex $item 2]
}
#
# Now compute H
#
set H 0
for {set i 0} {$i < $setCount} {incr i} {
set total [expr [join $rankValues($i) +]]
set count [llength $rankValues($i)]
set H [expr {$H + pow($total,2)/double($count)}]
}
set H [expr {$H*(12.0/($length*($length + 1))) - (3*($length + 1))}]
incr setCount -1
set p [expr {1 - [::math::statistics::cdf-chisquare $setCount $H]}]
return [list $H $p]
}
# group-rank --
# Rank groups of data with respect to the whole set
#
# Arguments:
# args Two or more lists of data
#
# Result:
# List of ranking data: for each data item, the group-ID,
# the value and the rank (may be a fraction, in case of ties)
#
proc ::math::statistics::group-rank {args} {
set index 0
set rankList [list]
set setCount [llength $args]
#
# Read lists of values
#
foreach item $args {
set values($index) [lindex $args $index]
#
# Prepare ranking with rank=0
#
foreach value $values($index) {
lappend rankList [list $index $value 0]
}
incr index 1
}
#
# Sort the values
#
set rankList [lsort -real -index 1 $rankList]
#
# Assign the ranks (disregarding ties)
#
set length [llength $rankList]
for {set i 0} {$i < $length} {incr i} {
lset rankList $i 2 [expr {$i + 1}]
}
#
# Value of the previous list element
#
set prevValue {}
#
# List of indices of list elements having the same value (ties)
#
set equalIndex [list]
#
# Test for ties and re-assign mean ranks for tied values
#
for {set i 0} {$i < $length} {incr i} {
set value [lindex $rankList $i 1]
if {($value != $prevValue) && ($i > 0) && ([llength $equalIndex] > 0)} {
#
# We are still missing the first tied value
#
set j [lindex $equalIndex 0]
incr j -1
set equalIndex [linsert $equalIndex 0 $j]
#
# Re-assign rank as mean rank of tied values
#
set firstRank [lindex $rankList [lindex $equalIndex 0] 2]
set lastRank [lindex $rankList [lindex $equalIndex end] 2]
set newRank [expr {($firstRank+$lastRank)/2.0}]
foreach j $equalIndex {
lset rankList $j 2 $newRank
}
#
# Clear list of equal elements
#
set equalIndex [list]
} elseif {$value == $prevValue} {
#
# Remember index of equal value element
#
lappend equalIndex $i
}
set prevValue $value
}
return $rankList
}
|