This file is indexed.

/usr/bin/tmxuniq is in libxml-tmx-perl 0.36-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/perl -s

# PODNAME: tmxuniq
# ABSTRACT: removes duplicated translation units from TMXs

BEGIN {
  eval "require Lingua::PT::PLNbase";
  if ($@) {
    print STDERR
      "This script requires Lingua::PT::PLNbase to run. We suggest installing\n",
      "it using CPAN: http://www.cpan.org/modules/INSTALL.html\n";
    exit(0);
  }
}

use DB_File;
use Fcntl ;
use XML::TMX::Reader;
use Digest::MD5 qw(md5_hex);
use Encode;
use Lingua::PT::PLNbase

our ($cont,$id,$dig,$tok,$o,$fast);

if ($cont) {
    tie %dic, 'DB_File', "__tmxuniq_$$.db", O_RDWR|O_CREAT , 0640, $DB_BTREE;
} else {
    tie %dic, 'DB_File', "__tmxuniq_$$.db", O_RDWR|O_CREAT|O_TRUNC , 0640, $DB_BTREE;
}

my $cid = 0;

for my $file (@ARGV){
    my $tm = XML::TMX::Reader->new($file);

    print STDERR "Processing...";

    $tm->for_tu
      (
       { output => $o || "$file._" },
       sub {
           my $tu = shift;
           $cid++;
           $tu->{-prop}{id} = $cid if $id;

           my $key = join("|||", map { n($tu->{$_}{-seg}) } sort grep { !/^-/ } keys %$tu);
           my $digest = md5_hex(encode_utf8($key));

           unless ($cid % 10000) {
               my $size = -s "__tmxuniq_$$.db";
               printf STDERR
                 "\rTotal: %10d  Removed: %8d (%.2f%%)  Database size: %10d bytes",
                   $cid, $rem, (100*$rem/$cid), $size;
           }

           if ($dic{$digest}) {
               $dic{$digest} .= "$cid;" unless $fast;
               $rem ++;
               return undef
           } else {
               $dic{$digest} = "$cid;";
               $tu->{-prop}{digest} = $digest if $dig;
               return {%$tu} ; # used clone.. no idea why
           }
       }
      );

    my $size = -s "__tmxuniq_$$.db";
    if ($cid) {
        printf STDERR "\rTotal: %10d  Removed: %8d (%.2f%%)  Database size: %10d bytes\n",
          $cid, $rem, (100*$rem/$cid), $size;
    } else {
        printf STDERR "\rHuh.. empty TMX?\n";
    }
    undef $tm;
}
untie %h;

sub n {
    my $a = shift;

    $a =~ s/\.{6,}/...../g;

    $a = tokenize( { rs => ' ' } => $a ) if $tok;

    $a =~ s/\s+/ /g;
    $a =~ s/ $//;
    $a =~ s/^ //;
    return $a;
}

__END__

=pod

=encoding UTF-8

=head1 NAME

tmxuniq - removes duplicated translation units from TMXs

=head1 VERSION

version 0.36

=head1 SYNOPSIS

 tmxuniq [options] -l=en:pt tmx1 ... 

=head1 DESCRIPTION

Removes duplicated translation units from a set of TMX (Translation
Memory eXange format).

=head1 OPTIONS

 -id  -- insert a uniq id property in each TU
 -dig -- insert a digest property in each TU
 -tok -- tokenize/normalize text
 -o=out.tmx -- (with 1 argument) redefine output (default = input._)

=head1 SEE ALSO

perl(1).

=head1 AUTHORS

=over 4

=item *

Alberto Simões <ambs@cpan.org>

=item *

José João Almeida <jj@di.uminho.pt>

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2010-2017 by Projeto Natura <natura@di.uminho.pt>.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut