/usr/bin/encguess is in perl 5.22.1-9.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if $running_under_some_shell;
#!./perl
use 5.008001;
use strict;
use warnings;
use Encode;
use Getopt::Std;
use Carp;
use Encode::Guess;
$Getopt::Std::STANDARD_HELP_VERSION = 1;
my %opt;
getopts( "huSs:", \%opt );
my @suspect_list;
list_valid_suspects() and exit if $opt{S};
@suspect_list = split /:,/, $opt{s} if $opt{s};
HELP_MESSAGE() if $opt{h};
HELP_MESSAGE() unless @ARGV;
do_guess($_) for @ARGV;
sub read_file {
my $filename = shift;
local $/;
open my $fh, '<:raw', $filename or croak "$filename:$!";
my $content = <$fh>;
close $fh;
return $content;
}
sub do_guess {
my $filename = shift;
my $data = read_file($filename);
my $enc = guess_encoding( $data, @suspect_list );
if ( !ref($enc) && $opt{u} ) {
return 1;
}
print "$filename\t";
if ( ref($enc) ) {
print $enc->mime_name();
}
else {
print "unknown";
}
print "\n";
return 1;
}
sub list_valid_suspects {
print join( "\n", Encode->encodings(":all") );
print "\n";
return 1;
}
sub HELP_MESSAGE {
exec 'pod2usage', $0 or die "pod2usage: $!"
}
__END__
=head1 NAME
encguess - guess character encodings of files
=head1 VERSION
$Id: encguess,v 0.1 2015/02/05 10:34:19 dankogai Exp $
=head1 SYNOPSIS
encguess [switches] filename...
=head2 SWITCHES
=over 2
=item -h
show this message and exit.
=item -s
specify a list of "suspect encoding types" to test,
seperated by either C<:> or C<,>
=item -S
output a list of all acceptable encoding types that can be used with
the -s param
=item -u
suppress display of unidentified types
=back
=head2 EXAMPLES:
=over 2
=item *
Guess encoding of a file named C<test.txt>, using only the default
suspect types.
encguess test.txt
=item *
Guess the encoding type of a file named C<test.txt>, using the suspect
types C<euc-jp,shiftjis,7bit-jis>.
encguess -s euc-jp,shiftjis,7bit-jis test.txt
encguess -s euc-jp:shiftjis:7bit-jis test.txt
=item *
Guess the encoding type of several files, do not display results for
unidentified files.
encguess -us euc-jp,shiftjis,7bit-jis test*.txt
=back
=head1 DESCRIPTION
The encoding identification is done by checking one encoding type at a
time until all but the right type are eliminated. The set of encoding
types to try is defined by the -s parameter and defaults to ascii,
utf8 and UTF-16/32 with BOM. This can be overridden by passing one or
more encoding types via the -s parameter. If you need to pass in
multiple suspect encoding types, use a quoted string with the a space
separating each value.
=head1 SEE ALSO
L<Encode::Guess>, L<Encode::Detect>
=head1 LICENSE AND COPYRIGHT
Copyright 2015 Michael LaGrasta and Dan Kogai.
This program is free software; you can redistribute it and/or modify it
under the terms of the the Artistic License (2.0). You may obtain a
copy of the full license at:
L<http://www.perlfoundation.org/artistic_license_2_0>
=cut
|