/usr/lib/perl5/Encode/Alias.pm is in libencode-perl 2.57-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | package Encode::Alias;
use strict;
use warnings;
no warnings 'redefine';
our $VERSION = do { my @r = ( q$Revision: 2.18 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
use Exporter 'import';
# Public, encouraged API is exported by default
our @EXPORT =
qw (
define_alias
find_alias
);
our @Alias; # ordered matching list
our %Alias; # cached known aliases
sub find_alias {
require Encode;
my $class = shift;
my $find = shift;
unless ( exists $Alias{$find} ) {
$Alias{$find} = undef; # Recursion guard
for ( my $i = 0 ; $i < @Alias ; $i += 2 ) {
my $alias = $Alias[$i];
my $val = $Alias[ $i + 1 ];
my $new;
if ( ref($alias) eq 'Regexp' && $find =~ $alias ) {
DEBUG and warn "eval $val";
$new = eval $val;
DEBUG and $@ and warn "$val, $@";
}
elsif ( ref($alias) eq 'CODE' ) {
DEBUG and warn "$alias", "->", "($find)";
$new = $alias->($find);
}
elsif ( lc($find) eq lc($alias) ) {
$new = $val;
}
if ( defined($new) ) {
next if $new eq $find; # avoid (direct) recursion on bugs
DEBUG and warn "$alias, $new";
my $enc =
( ref($new) ) ? $new : Encode::find_encoding($new);
if ($enc) {
$Alias{$find} = $enc;
last;
}
}
}
# case insensitive search when canonical is not in all lowercase
# RT ticket #7835
unless ( $Alias{$find} ) {
my $lcfind = lc($find);
for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule )
{
$lcfind eq lc($name) or next;
$Alias{$find} = Encode::find_encoding($name);
DEBUG and warn "$find => $name";
}
}
}
if (DEBUG) {
my $name;
if ( my $e = $Alias{$find} ) {
$name = $e->name;
}
else {
$name = "";
}
warn "find_alias($class, $find)->name = $name";
}
return $Alias{$find};
}
sub define_alias {
while (@_) {
my ( $alias, $name ) = splice( @_, 0, 2 );
unshift( @Alias, $alias => $name ); # newer one has precedence
if ( ref($alias) ) {
# clear %Alias cache to allow overrides
my @a = keys %Alias;
for my $k (@a) {
if ( ref($alias) eq 'Regexp' && $k =~ $alias ) {
DEBUG and warn "delete \$Alias\{$k\}";
delete $Alias{$k};
}
elsif ( ref($alias) eq 'CODE' && $alias->($k) ) {
DEBUG and warn "delete \$Alias\{$k\}";
delete $Alias{$k};
}
}
}
else {
DEBUG and warn "delete \$Alias\{$alias\}";
delete $Alias{$alias};
}
}
}
# Allow latin-1 style names as well
# 0 1 2 3 4 5 6 7 8 9 10
our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
# Allow winlatin1 style names as well
our %Winlatin2cp = (
'latin1' => 1252,
'latin2' => 1250,
'cyrillic' => 1251,
'greek' => 1253,
'turkish' => 1254,
'hebrew' => 1255,
'arabic' => 1256,
'baltic' => 1257,
'vietnamese' => 1258,
);
init_aliases();
sub undef_aliases {
@Alias = ();
%Alias = ();
}
sub init_aliases {
require Encode;
undef_aliases();
# Try all-lower-case version should all else fails
define_alias( qr/^(.*)$/ => '"\L$1"' );
# UTF/UCS stuff
define_alias( qr/^(unicode-1-1-)?UTF-?7$/i => '"UTF-7"' );
define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
define_alias(
qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
qr/^iso-10646-1$/i => '"UCS-2BE"'
);
define_alias(
qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
qr/^UTF-?(16|32)$/i => '"UTF-$1"',
);
# ASCII
define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' );
define_alias( 'C' => 'ascii' );
define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' );
# Allow variants of iso-8859-1 etc.
define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
# At least HP-UX has these.
define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
# More HP stuff.
define_alias(
qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i =>
'"${1}8"' );
# The Official name of ASCII.
define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
# This is a font issue, not an encoding issue.
# (The currency symbol of the Latin 1 upper half
# has been redefined as the euro symbol.)
define_alias( qr/^(.+)\@euro$/i => '"$1"' );
define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i =>
'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
);
define_alias(
qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
hebrew|arabic|baltic|vietnamese)$/ix =>
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
);
# Common names for non-latin preferred MIME names
define_alias(
'ascii' => 'US-ascii',
'cyrillic' => 'iso-8859-5',
'arabic' => 'iso-8859-6',
'greek' => 'iso-8859-7',
'hebrew' => 'iso-8859-8',
'thai' => 'iso-8859-11',
);
# RT #20781
define_alias(qr/\btis-?620\b/i => '"iso-8859-11"');
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
# And Microsoft has their own naming (again, surprisingly).
# And windows-* is registered in IANA!
define_alias(
qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' );
# Sometimes seen with a leading zero.
# define_alias( qr/\bcp037\b/i => '"cp37"');
# Mac Mappings
# predefined in *.ucm; unneeded
# define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' );
# http://rt.cpan.org/Ticket/Display.html?id=36326
define_alias( qr/^macintosh$/i => '"MacRoman"' );
# https://rt.cpan.org/Ticket/Display.html?id=78125
define_alias( qr/^macce$/i => '"MacCentralEurRoman"' );
# Ououououou. gone. They are different!
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
# Standardize on the dashed versions.
define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' );
unless ($Encode::ON_EBCDIC) {
# for Encode::CN
define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
# define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
# CP936 doesn't have vendor-addon for GBK, so they're identical.
define_alias( qr/^gbk$/i => '"cp936"' );
# This fixes gb2312 vs. euc-cn confusion, practically
define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
# for Encode::JP
define_alias( qr/\bjis$/i => '"7bit-jis"' );
define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
define_alias( qr/\bujis$/i => '"euc-jp"' );
define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
define_alias( qr/\bsjis$/i => '"shiftjis"' );
define_alias( qr/\bwindows-31j$/i => '"cp932"' );
# for Encode::KR
define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
# This fixes ksc5601 vs. euc-kr confusion, practically
define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
# for Encode::TW
define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
}
# utf8 is blessed :)
define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' );
# At last, Map white space and _ to '-'
define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
}
1;
__END__
# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
# TODO: HP-UX '15' encodings japanese15 korean15 roi15
# TODO: Cyrillic encoding ISO-IR-111 (useful?)
# TODO: Armenian encoding ARMSCII-8
# TODO: Hebrew encoding ISO-8859-8-1
# TODO: Thai encoding TCVN
# TODO: Vietnamese encodings VPS
# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
# Kannada Khmer Korean Laotian Malayalam Mongolian
# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
=head1 NAME
Encode::Alias - alias definitions to encodings
=head1 SYNOPSIS
use Encode;
use Encode::Alias;
define_alias( "newName" => ENCODING);
define_alias( qr/.../ => ENCODING);
define_alias( sub { return ENCODING if ...; } );
=head1 DESCRIPTION
Allows newName to be used as an alias for ENCODING. ENCODING may be
either the name of an encoding or an encoding object (as described
in L<Encode>).
Currently the first argument to define_alias() can be specified in the
following ways:
=over 4
=item As a simple string.
=item As a qr// compiled regular expression, e.g.:
define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
in order to allow C<$1> etc. to be substituted. The example is one
way to alias names as used in X11 fonts to the MIME names for the
iso-8859-* family. Note the double quotes inside the single quotes.
(or, you don't have to do this yourself because this example is predefined)
If you are using a regex here, you have to use the quotes as shown or
it won't work. Also note that regex handling is tricky even for the
experienced. Use this feature with caution.
=item As a code reference, e.g.:
define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
The same effect as the example above in a different way. The coderef
takes the alias name as an argument and returns a canonical name on
success or undef if not. Note the second argument is ignored if provided.
Use this with even more caution than the regex version.
=back
=head3 Changes in code reference aliasing
As of Encode 1.87, the older form
define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
no longer works.
Encode up to 1.86 internally used "local $_" to implement this older
form. But consider the code below;
use Encode;
$_ = "eeeee" ;
while (/(e)/g) {
my $utf = decode('aliased-encoding-name', $1);
print "position:",pos,"\n";
}
Prior to Encode 1.86 this fails because of "local $_".
=head2 Alias overloading
You can override predefined aliases by simply applying define_alias().
The new alias is always evaluated first, and when necessary,
define_alias() flushes the internal cache to make the new definition
available.
# redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
# superset of SHIFT_JIS
define_alias( qr/shift.*jis$/i => '"cp932"' );
define_alias( qr/sjis$/i => '"cp932"' );
If you want to zap all predefined aliases, you can use
Encode::Alias->undef_aliases;
to do so. And
Encode::Alias->init_aliases;
gets the factory settings back.
Note that define_alias() will not be able to override the canonical name
of encodings. Encodings are first looked up by canonical name before
potential aliases are tried.
=head1 SEE ALSO
L<Encode>, L<Encode::Supported>
=cut
|