This file is indexed.

/usr/share/perl5/TM/Analysis.pm is in libtm-perl 1.56-7.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
package TM::Analysis;

use TM;
use Data::Dumper;

use Class::Trait 'base';
use Class::Trait 'TM::Graph';

=pod

=head1 NAME

TM::Analysis - Topic Maps, analysis functions

=head1 SYNOPSIS

  use TM::Materialized::AsTMa;
  my $tm = new TM::Materialized::AsTMa (file => 'test.atm');
  $tm->sync_in;

  Class::Trait->apply ($tm, 'TM::Analysis');

  print Dumper $tm->statistics;

  print Dumper $tm->orphanage;

=head1 DESCRIPTION

This package contains some topic map analysis functionality.

=head1 INTERFACE

=over

=item B<statistics>

This (currently quite limited) function computes a reference to hash containing the
following fields:

=over

=item C<nr_toplets>

Nr of midlets in the map. This includes ALL midlets for topics and also those for
assertions.

=item C<nr_asserts>

Nr of assertions in the map.

=item C<nr_clusters>

Nr of clusters according to the C<cluster> function elsewhere in this document.

=back

=cut

sub statistics {
    my $self = shift;
    my %s; # result

    foreach my $a (@_ ? @_ : qw(nr_toplets nr_asserts nr_clusters)) {       # default is all
	$s{$a} = scalar $self->toplets      if $a eq 'nr_toplets';
	$s{$a} = scalar $self->match_forall if $a eq 'nr_asserts';

	if ($a eq 'nr_clusters') { # clusters
	    Class::Trait->apply ($self, 'TM::Graph');                       # make sure we can do it
	    my $clusters = $self->clusters (use_roles => 1, use_type => 1);
	    $s{$a} = scalar @$clusters;
	}
    };

    # TODO: size of map
    # TODO: payload (basenames, occurrence data, variant

    return \%s;
}


my %o;
return \%o;


=pod

=item B<orphanage>

This computes all topics which have either no supertype and also those which have no type. Without
further parameters, it returns a hash reference with the following fields:

=over

=item C<untyped>

Holds a list reference to all topic ids which have no type.

=item C<empty>

Holds a list reference to all topic ids which have no instance.

=item C<unclassified>

Holds a list reference to all topic ids which have no superclass.

=item C<unspecified>

Holds a list reference to all topic ids which have no subclass.

=back

Optionally, a list of the identifiers above can be passed in so that only that particular
information is actually returned (some speedup):

   my $o = TM::Analysis::orphanage ($tm, 'untyped');

=cut

sub orphanage {
    my $self = shift;

    my %types     = (); # each topic -> how many types
    my %instances = (); # each topic -> how many instances
    my %supers    = (); # each topic -> how many superclasses
    my %subs      = (); # each topic -> how many subclasses

    my ($ISA, $ISSC, $CLASS, $INSTANCE) = ('isa', 'is-subclass-of', 'class', 'instance');

    foreach my $a (values %{$self->{assertions}}) {
	$types{$a->[TM->LID]}++; $instances{$a->[TM->TYPE]}++;

	if ($a->[TM->TYPE] eq $ISA) {
	    my ($class, $instance) = @{ $a->[TM->PLAYERS] };
	    $types{$instance}++; $instances{$class}++;
	} elsif ($a->[TM->TYPE] eq $ISSC) {
	    my ($sub, $super) = @{ $a->[TM->PLAYERS] };
	    $supers{$sub}++; $subs{$super}++;
	}
    }
#warn Dumper (\%types , \%instances, \%supers, \%subs);

    my @all = map { $_->[TM->LID] } $self->toplets;
    my %o;
    foreach my $a (@_ ? @_ : qw(untyped empty unclassified unspecified)) {       # default is all
	$o{$a} = [ grep !$types{$_},     @all ] if $a eq 'untyped';
	$o{$a} = [ grep !$instances{$_}, @all ] if $a eq 'empty';
	$o{$a} = [ grep !$supers{$_},    @all ] if $a eq 'unclassified';
	$o{$a} = [ grep !$subs{$_},      @all ] if $a eq 'unspecified';
    };
    return \%o;
}

=pod

=item B<entropy>

This method returns a hash (reference) where the keys are the assertion types and the values are the
individual entropies of these assertion types. More frequently used (inflationary) types will have a
lower value, very seldomly used ones too. Only those in the middle count most.

=cut

sub entropy {
    my $self = shift;

    my %S;
    my $Total;

    { # compute statistics first
	foreach my $a (values %{$self->{assertions}}) {
	    $S{ $a->[TM->TYPE] }++;
	    $Total++;
	}
    }
    return {
	map { $_->[0] => - $_->[1] * log ( $_->[1] ) }         # compute the entropy
	map { [ $_, $S{$_}/$Total ] }                          # compute their probability (schartzian)
	keys %S                                                # iterate over all assertion types
    };
}

=pod

=back

=head1 SEE ALSO

L<TM>

=head1 COPYRIGHT AND LICENSE

Copyright 20(0[3-68]|10) by Robert Barta, E<lt>drrho@cpan.orgE<gt>

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut

our $VERSION  = '0.91';

1;

__END__


 item B<info>

I<$hashref> = I<$tm>->info (I<list of info items>)

returns some meta/statistical information about the map in form of
a hash reference containing one or more of the following components (you might
want to discover the return values with Data::Dumper):

 over

 item (a)

I<informational>: this hash reference contains the number of topics, the number of associations,
the UNIX date of the last modification and synchronisation with the external tied object and
a list reference to other topic maps on which this particular map depends.

 item (b)

I<warnings>

This hash reference contains a list (reference) of topic ids of topics I<not_used> anywhere in the map.
There is also a list (I<no_baseName>) of topics which do not contain any baseName (yes this is allowed in section
3.6.1 of the standard).

 item (c)

I<errors>

This component contains a list reference I<undefined_topics> containing a list of topic identifiers
of topics not defined in the map. 

 item (d)

I<statistics>

This component contains a hash reference to various statistics information, as the number of clusters,
maximum and minimum size of clusters, number of topics defined and topics mentioned.


TODOs:

 over

 item

detect cyclic dependency of topic types

 back

 back

You can control via a parameter in which information you are interested in:

Example:

   $my_info = $tm->info ('informational', 'warning', 'errors', 'statistics');


 cut

sub info {
  my $self  = shift;
  my @what  = @_;

  my $info;
  my $usage;

  foreach my $w (@what) {
    if ($w eq 'informational') {
      $info->{$w} = { #address     => $self,
		      nr_topics   => scalar @{$self->topics},
		      nr_assocs   => scalar @{$self->associations},
		      last_mod    => $self->{last_mod},
		      last_syncin => $self->{last_syncin},
		      depends     => [ map { $_->{memory}->{id} } @{$self->{depends}} ],
		      tieref      => ref ($self->{tie}),
		      id          => $self->{memory} ? $self->{memory}->{id} : undef
		    };
    } elsif ($w eq 'warnings') {
      # figure out those topics which do not seem to have a single baseName
      $info->{$w}->{'no_baseName'} = [];
      foreach my $tid (@{$self->topics()}) {
	push @{$info->{$w}->{'no_baseName'}}, $tid unless $self->topic($tid)->baseNames && @{$self->topic($tid)->baseNames};
      }
      $usage = $self->_usage() unless $usage;

sub _usage {
  my $self = shift;

  my $usage;
  # figure out which topics are used as topicRef (scope, member, role, instanceOf)
  foreach my $tid (@{$self->topics()}) {
    # instanceOfs
    foreach my $i (@{$self->topic($tid)->instanceOfs}) {
      $usage->{as_instanceOf}->{$1}++ if $i->reference->href =~ /^\#(.+)/;
      $usage->{as_instance}->{$tid}++ unless $i->reference->href eq $XTM::PSI::xtm{topic};
    }
    # scopes
    foreach my $b (@{$self->topic($tid)->baseNames}) { 
      foreach my $s (@{$b->scope->references}) {
	if ($s->href =~ /^\#(.+)/) {
	  $usage->{as_scope}->{$1}++;
	}
      }
    }
    foreach my $o (@{$self->topic($tid)->occurrences}) { 
	if ($o->instanceOf->reference->href =~ /^\#(.+)/) {
            $usage->{as_instanceOf}->{$1}++;
	}
        foreach my $r (@{$o->scope->references}) {
	    if ($r->href =~ /^\#(.+)/) {
                $usage->{as_scope}->{$1}++;
	    }
	}
    }
  }
  foreach my $aid (@{$self->associations()}) {
    # instanceOfs
    if (my $i = $self->association($aid)->instanceOf) {
      if ($i->reference->href =~ /^\#(.+)/) {
	$usage->{as_instanceOf}->{$1}++;
      }
    }
    foreach my $m (@{$self->association($aid)->members}) {
      # roles
      if ($m->roleSpec) {
	$usage->{as_role}->{$1}++ if ($m->roleSpec->reference->href =~ /^\#(.+)/);
      }
      # members
      foreach my $r (@{$m->references}) {
	$usage->{as_member}->{$1}++ if ($r->href =~ /^\#(.+)/);
      }
    }
  }
  return $usage;
}
      use Data::Dumper;
##      print STDERR Dumper \%as_instanceOf, \%as_scope, \%as_member, \%as_role;
##print Dumper $usage;

      $info->{$w}->{'not_used'} = [ 
         grep (! ( $usage->{as_instanceOf}->{$_} || 
		   $usage->{as_instance}->{$_}   || 
		   $usage->{as_scope}->{$_}      || 
		   $usage->{as_member}->{$_}     ||
		   $usage->{as_role}->{$_}), @{$self->topics()}) 
				  ];
    } elsif ($w eq 'errors') {
      $usage = $self->_usage() unless $usage;
      $info->{$w}->{'undefined_topics'} = [
         grep (!$self->is_topic($_), (keys %{$usage->{as_instanceOf}},
				      keys %{$usage->{as_instance}},
				      keys %{$usage->{as_scope}},
				      keys %{$usage->{as_member}},
				      keys %{$usage->{as_role}})
	      )
					    ];
    } elsif ($w eq 'statistics') {
      $usage       = $self->_usage() unless $usage;
#use Data::Dumper;
#print STDERR Dumper ($usage);
      my $clusters = $self->clusters();
      my ($tot, $min, $max) = (0, undef, 0);
      foreach my $c (keys %$clusters) {
	  $tot += scalar @{$clusters->{$c}};
	  $min = $min ? ($min > scalar @{$clusters->{$c}} ? scalar @{$clusters->{$c}} : $min) : scalar @{$clusters->{$c}};
	  $max =         $max < scalar @{$clusters->{$c}} ? scalar @{$clusters->{$c}} : $max;
      }

      $info->{$w} = {
		     nr_topics_defined   => scalar @{$self->topics},
		     nr_assocs           => scalar @{$self->associations},
		     nr_clusters         => scalar keys %$clusters,
		     mean_topics_per_cluster => %$clusters ? 1.0 * $tot / scalar keys %$clusters : 1, # empty map => 1 cluster (do not argue with me here)
		     max_topics_per_cluster  => $max,
		     min_topics_per_cluster  => $min,
		     nr_topics_mentioned     => $tot,
		     };
    }; # ignore other directives
  }
  return $info;
}

 =pod