This file is indexed.

/usr/bin/tmx-tokenize is in libxml-tmx-perl 0.36-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
#!/usr/bin/perl -s

# PODNAME: tmx-tokenize
# ABSTRACT: Tokenizes translation units on a tmx file.

use strict;
use warnings;

our $o;

eval { require FL3 };
die "This XML::TMX script requires Lingua::FreeLing3 to be installed\n" if $@;

FL3->import();
use XML::TMX::Reader '0.25';

my $file = shift or die "You must supply the name of the file to tokenize";

my $reader = XML::TMX::Reader->new($file);

my $output = "t_$file";
$output = $o if $o;

binmode STDOUT, ":utf8";
$reader->for_tu( {
                  -output => $output,
                  -prop => { tokenized => "true" },
                  verbose => 1
                 },
                 sub {
                     my $tu = shift;
                     for my $lang (keys %$tu) {
                         if ($lang =~ /(pt|es|it|ru|en|gl)/i) {
                             my $ln = lc $1;
                             my $txt = $tu->{$lang}{-seg};
                             if ($txt !~ /^\s*$/) {
                             $txt = join(" ",
                                         @{ tokenizer($ln)->tokenize($txt,
                                                                     to_text => 1)});
                             }
                             $tu->{$lang}{-seg} = $txt;
                         }
                     }
                     return $tu;
                 });

__END__

=pod

=encoding UTF-8

=head1 NAME

tmx-tokenize - Tokenizes translation units on a tmx file.

=head1 VERSION

version 0.36

=head1 SYNOPSIS

   tmx-tokenize file.tmx  # creates t_file.tmx

   tmx-tokenize -o=out.tmx file.tmx

=head1 DESCRIPTION

Although this script is bundled in C<XML::TMX>, it has a soft
dependency on C<Lingua::FreeLing3>. Soft means that the dependency is
not ensured at install time, and other features of the module can
still be used without C<Lingua::FreeLing3>. Nevertheless, if you want
to use this tool you should install that module.

At the moment the supported languages are the same as supported by
FreeLing3: English, Spanish, Russian, Portuguese and Italian.

It your TMX file includes any other language, they will be maintained
without a change.  This behavior can change in the future, as a basic
regexp based tokenizer might be implemented.

=head1 SEE ALSO

XML::TMX, Lingua::FreeLing3

=head1 AUTHORS

=over 4

=item *

Alberto Simões <ambs@cpan.org>

=item *

José João Almeida <jj@di.uminho.pt>

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2010-2017 by Projeto Natura <natura@di.uminho.pt>.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut