This file is indexed.

/usr/share/octave/packages/statistics-1.3.0/hmmestimate.m is in octave-statistics 1.3.0-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
## Copyright (C) 2006, 2007 Arno Onken <asnelt@asnelt.org>
##
## This program is free software; you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation; either version 3 of the License, or (at your option) any later
## version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn {Function File} {[@var{transprobest}, @var{outprobest}] =} hmmestimate (@var{sequence}, @var{states})
## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'statenames', @var{statenames})
## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'symbols', @var{symbols})
## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'pseudotransitions', @var{pseudotransitions})
## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'pseudoemissions', @var{pseudoemissions})
## Estimate the matrix of transition probabilities and the matrix of output
## probabilities of a given sequence of outputs and states generated by a
## hidden Markov model. The model assumes that the generation starts in
## state @code{1} at step @code{0} but does not include step @code{0} in the
## generated states and sequence.
##
## @subheading Arguments
##
## @itemize @bullet
## @item
## @var{sequence} is a vector of a sequence of given outputs. The outputs
## must be integers ranging from @code{1} to the number of outputs of the
## hidden Markov model.
##
## @item
## @var{states} is a vector of the same length as @var{sequence} of given
## states. The states must be integers ranging from @code{1} to the number
## of states of the hidden Markov model.
## @end itemize
##
## @subheading Return values
##
## @itemize @bullet
## @item
## @var{transprobest} is the matrix of the estimated transition
## probabilities of the states. @code{transprobest(i, j)} is the estimated
## probability of a transition to state @code{j} given state @code{i}.
##
## @item
## @var{outprobest} is the matrix of the estimated output probabilities.
## @code{outprobest(i, j)} is the estimated probability of generating
## output @code{j} given state @code{i}.
## @end itemize
##
## If @code{'symbols'} is specified, then @var{sequence} is expected to be a
## sequence of the elements of @var{symbols} instead of integers.
## @var{symbols} can be a cell array.
##
## If @code{'statenames'} is specified, then @var{states} is expected to be
## a sequence of the elements of @var{statenames} instead of integers.
## @var{statenames} can be a cell array.
##
## If @code{'pseudotransitions'} is specified then the integer matrix
## @var{pseudotransitions} is used as an initial number of counted
## transitions. @code{pseudotransitions(i, j)} is the initial number of
## counted transitions from state @code{i} to state @code{j}.
## @var{transprobest} will have the same size as @var{pseudotransitions}.
## Use this if you have transitions that are very unlikely to occur.
##
## If @code{'pseudoemissions'} is specified then the integer matrix
## @var{pseudoemissions} is used as an initial number of counted outputs.
## @code{pseudoemissions(i, j)} is the initial number of counted outputs
## @code{j} given state @code{i}. If @code{'pseudoemissions'} is also
## specified then the number of rows of @var{pseudoemissions} must be the
## same as the number of rows of @var{pseudotransitions}. @var{outprobest}
## will have the same size as @var{pseudoemissions}. Use this if you have
## outputs or states that are very unlikely to occur.
##
## @subheading Examples
##
## @example
## @group
## transprob = [0.8, 0.2; 0.4, 0.6];
## outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1];
## [sequence, states] = hmmgenerate (25, transprob, outprob);
## [transprobest, outprobest] = hmmestimate (sequence, states)
## @end group
##
## @group
## symbols = @{'A', 'B', 'C'@};
## statenames = @{'One', 'Two'@};
## [sequence, states] = hmmgenerate (25, transprob, outprob,
##                      'symbols', symbols, 'statenames', statenames);
## [transprobest, outprobest] = hmmestimate (sequence, states,
##                              'symbols', symbols,
##                              'statenames', statenames)
## @end group
##
## @group
## pseudotransitions = [8, 2; 4, 6];
## pseudoemissions = [2, 4, 4; 7, 2, 1];
## [sequence, states] = hmmgenerate (25, transprob, outprob);
## [transprobest, outprobest] = hmmestimate (sequence, states, 'pseudotransitions', pseudotransitions, 'pseudoemissions', pseudoemissions)
## @end group
## @end example
##
## @subheading References
##
## @enumerate
## @item
## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics
## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC,
## 2001.
##
## @item
## Lawrence R. Rabiner. A Tutorial on Hidden Markov Models and Selected
## Applications in Speech Recognition. @cite{Proceedings of the IEEE},
## 77(2), pages 257-286, February 1989.
## @end enumerate
## @end deftypefn

## Author: Arno Onken <asnelt@asnelt.org>
## Description: Estimation of a hidden Markov model for a given sequence

function [transprobest, outprobest] = hmmestimate (sequence, states, varargin)

  # Check arguments
  if (nargin < 2 || mod (length (varargin), 2) != 0)
    print_usage ();
  endif

  len = length (sequence);
  if (length (states) != len)
    error ("hmmestimate: sequence and states must have equal length");
  endif

  # Flag for symbols
  usesym = false;
  # Flag for statenames
  usesn = false;

  # Variables for return values
  transprobest = [];
  outprobest = [];

  # Process varargin
  for i = 1:2:length (varargin)
    # There must be an identifier: 'symbols', 'statenames',
    # 'pseudotransitions' or 'pseudoemissions'
    if (! ischar (varargin{i}))
      print_usage ();
    endif
    # Upper case is also fine
    lowerarg = lower (varargin{i});
    if (strcmp (lowerarg, 'symbols'))
      usesym = true;
      # Use the following argument as symbols
      symbols = varargin{i + 1};
    # The same for statenames
    elseif (strcmp (lowerarg, 'statenames'))
      usesn = true;
      # Use the following argument as statenames
      statenames = varargin{i + 1};
    elseif (strcmp (lowerarg, 'pseudotransitions'))
      # Use the following argument as an initial count for transitions
      transprobest = varargin{i + 1};
      if (! ismatrix (transprobest))
        error ("hmmestimate: pseudotransitions must be a non-empty numeric matrix");
      endif
      if (rows (transprobest) != columns (transprobest))
        error ("hmmestimate: pseudotransitions must be a square matrix");
      endif
    elseif (strcmp (lowerarg, 'pseudoemissions'))
      # Use the following argument as an initial count for outputs
      outprobest = varargin{i + 1};
      if (! ismatrix (outprobest))
        error ("hmmestimate: pseudoemissions must be a non-empty numeric matrix");
      endif
    else
      error ("hmmestimate: expected 'symbols', 'statenames', 'pseudotransitions' or 'pseudoemissions' but found '%s'", varargin{i});
    endif
  endfor

  # Transform sequence from symbols to integers if necessary
  if (usesym)
    # sequenceint is used to build the transformed sequence
    sequenceint = zeros (1, len);
    for i = 1:length (symbols)
      # Search for symbols(i) in the sequence, isequal will have 1 at
      # corresponding indices; i is the right integer for that symbol
      isequal = ismember (sequence, symbols(i));
      # We do not want to change sequenceint if the symbol appears a second
      # time in symbols
      if (any ((sequenceint == 0) & (isequal == 1)))
        isequal *= i;
        sequenceint += isequal;
      endif
    endfor
    if (! all (sequenceint))
      index = max ((sequenceint == 0) .* (1:len));
      error (["hmmestimate: sequence(" int2str (index) ") not in symbols"]);
    endif
    sequence = sequenceint;
  else
    if (! isvector (sequence))
      error ("hmmestimate: sequence must be a non-empty vector");
    endif
    if (! all (ismember (sequence, 1:max (sequence))))
      index = max ((ismember (sequence, 1:max (sequence)) == 0) .* (1:len));
      error (["hmmestimate: sequence(" int2str (index) ") not feasible"]);
    endif
  endif

  # Transform states from statenames to integers if necessary
  if (usesn)
    # statesint is used to build the transformed states
    statesint = zeros (1, len);
    for i = 1:length (statenames)
      # Search for statenames(i) in states, isequal will have 1 at
      # corresponding indices; i is the right integer for that statename
      isequal = ismember (states, statenames(i));
      # We do not want to change statesint if the statename appears a second
      # time in statenames
      if (any ((statesint == 0) & (isequal == 1)))
        isequal *= i;
        statesint += isequal;
      endif
    endfor
    if (! all (statesint))
      index = max ((statesint == 0) .* (1:len));
      error (["hmmestimate: states(" int2str (index) ") not in statenames"]);
    endif
    states = statesint;
  else
    if (! isvector (states))
      error ("hmmestimate: states must be a non-empty vector");
    endif
    if (! all (ismember (states, 1:max (states))))
      index = max ((ismember (states, 1:max (states)) == 0) .* (1:len));
      error (["hmmestimate: states(" int2str (index) ") not feasible"]);
    endif
  endif

  # Estimate the number of different states as the max of states  
  nstate = max (states);
  # Estimate the number of different outputs as the max of sequence
  noutput = max (sequence);

  # transprobest is empty if pseudotransitions is not specified
  if (isempty (transprobest))
    # outprobest is not empty if pseudoemissions is specified
    if (! isempty (outprobest))
      if (nstate > rows (outprobest))
        error ("hmmestimate: not enough rows in pseudoemissions");
      endif
      # The number of states is specified by pseudoemissions
      nstate = rows (outprobest);
    endif
    transprobest = zeros (nstate, nstate);
  else
    if (nstate > rows (transprobest))
      error ("hmmestimate: not enough rows in pseudotransitions");
    endif
    # The number of states is given by pseudotransitions
    nstate = rows (transprobest);
  endif

  # outprobest is empty if pseudoemissions is not specified
  if (isempty (outprobest))
    outprobest = zeros (nstate, noutput);
  else
    if (noutput > columns (outprobest))
      error ("hmmestimate: not enough columns in pseudoemissions");
    endif
    # Number of outputs is specified by pseudoemissions
    noutput = columns (outprobest);
    if (rows (outprobest) != nstate)
      error ("hmmestimate: pseudoemissions must have the same number of rows as pseudotransitions");
    endif
  endif

  # Assume that the model started in state 1
  cstate = 1;
  for i = 1:len
    # Count the number of transitions for each state pair
    transprobest(cstate, states(i)) ++;
    cstate = states (i);
    # Count the number of outputs for each state output pair
    outprobest(cstate, sequence(i)) ++;
  endfor

  # transprobest and outprobest contain counted numbers
  # Each row in transprobest and outprobest should contain estimated
  # probabilities
  # => scale so that the sum is 1
  # A zero row remains zero
  # - for transprobest
  s = sum (transprobest, 2);
  s(s == 0) = 1;
  transprobest = transprobest ./ (s * ones (1, nstate));
  # - for outprobest
  s = sum (outprobest, 2);
  s(s == 0) = 1;
  outprobest = outprobest ./ (s * ones (1, noutput));

endfunction

%!test
%! sequence = [1, 2, 1, 1, 1, 2, 2, 1, 2, 3, 3, 3, 3, 2, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 3];
%! states = [1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1];
%! [transprobest, outprobest] = hmmestimate (sequence, states);
%! expectedtransprob = [0.88889, 0.11111; 0.28571, 0.71429];
%! expectedoutprob = [0.16667, 0.33333, 0.50000; 1.00000, 0.00000, 0.00000];
%! assert (transprobest, expectedtransprob, 0.001);
%! assert (outprobest, expectedoutprob, 0.001);

%!test
%! sequence = {'A', 'B', 'A', 'A', 'A', 'B', 'B', 'A', 'B', 'C', 'C', 'C', 'C', 'B', 'C', 'A', 'A', 'A', 'A', 'C', 'C', 'B', 'C', 'A', 'C'};
%! states = {'One', 'One', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'Two', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One'};
%! symbols = {'A', 'B', 'C'};
%! statenames = {'One', 'Two'};
%! [transprobest, outprobest] = hmmestimate (sequence, states, 'symbols', symbols, 'statenames', statenames);
%! expectedtransprob = [0.88889, 0.11111; 0.28571, 0.71429];
%! expectedoutprob = [0.16667, 0.33333, 0.50000; 1.00000, 0.00000, 0.00000];
%! assert (transprobest, expectedtransprob, 0.001);
%! assert (outprobest, expectedoutprob, 0.001);

%!test
%! sequence = [1, 2, 1, 1, 1, 2, 2, 1, 2, 3, 3, 3, 3, 2, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 3];
%! states = [1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1];
%! pseudotransitions = [8, 2; 4, 6];
%! pseudoemissions = [2, 4, 4; 7, 2, 1];
%! [transprobest, outprobest] = hmmestimate (sequence, states, 'pseudotransitions', pseudotransitions, 'pseudoemissions', pseudoemissions);
%! expectedtransprob = [0.85714, 0.14286; 0.35294, 0.64706];
%! expectedoutprob = [0.178571, 0.357143, 0.464286; 0.823529, 0.117647, 0.058824];
%! assert (transprobest, expectedtransprob, 0.001);
%! assert (outprobest, expectedoutprob, 0.001);