/usr/share/perl5/Lingua/Preferred.pm is in liblingua-preferred-perl 0.2.4-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | package Lingua::Preferred;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
require Exporter;
require AutoLoader;
# Use Log::TraceMessages if installed.
BEGIN {
eval { require Log::TraceMessages };
if ($@) {
*t = sub {};
*d = sub { '' };
}
else {
*t = \&Log::TraceMessages::t;
*d = \&Log::TraceMessages::d;
Log::TraceMessages::check_argv();
}
}
@ISA = qw(Exporter AutoLoader);
@EXPORT = qw(); @EXPORT_OK = qw(which_lang acceptable_lang);
$VERSION = '0.2.4';
=pod
=head1 NAME
Lingua::Preferred - Perl extension to choose a language
=head1 SYNOPSIS
use Lingua::Preferred qw(which_lang acceptable_lang);
my @wanted = qw(en de fr it de_CH);
my @available = qw(fr it de);
my $which = which_lang(\@wanted, \@available);
print "language $which is the best of those available\n";
foreach (qw(en_US fr nl de_DE)) {
print "language $_ is acceptable\n"
if acceptable_lang(\@wanted, $_);
}
=head1 DESCRIPTION
Often human-readable information is available in more than one
language. Which should you use? This module provides a way for the
user to specify possible languages in order of preference, and then to
pick the best language of those available. Different 'dialects' given
by the 'territory' part of the language specifier (such as en, en_GB,
and en_US) are also supported.
The routine C<which_lang()> picks the best language from a list of
alternatives. The arguments are:
=over
=item
a reference to a list of preferred languages (first is best). Here, a
language is a string like C<'en'> or C<'fr_CA'>. (C<'fr_*'> can also
be given - see below.) C<'C'> (named for the Unix 'C' locale) matches
any language.
=item
a reference to non-empty list of available languages. Here, a
language can be like C<'en'>, C<'en_CA'>, or C<undef> meaning 'unknown'.
=back
The return code is which language to use. This will always be an
element of the available languages list.
The cleverness of this module (if you can call it that) comes from
inferring implicit language preferences based on the explicit list
passed in. For example, if you say that en is acceptable, then en_IE
and en_DK will presumably be acceptable too (but not as good as just
plain en). If you give your language as en_US, then en is almost as
good, with the other dialects of en following soon afterwards.
If there is a tie between two choices, as when two dialects of the
same language are available and neither is explicitly preferred, or
when none of the available languages appears in the userE<39>s list,
then the choice appearing earlier in the available list is preferred.
Sometimes, the automatic inferring of related dialects is not what you
want, because a language dialect may be very different to the 'main'
language, for example Swiss German or some forms of English. For this
case, the special form 'XX_*' is available. If you dislike Mexican
Spanish (as a completely arbitrary example), then C<[ 'es', 'es_*',
'es_MX' ]> would rank this dialect below any other dialect of es (but
still acceptable). You donE<39>t have to explicitly list every other
dialect of Spanish before es_MX.
So for example, supposing C<@avail> contains the languages available:
=over
=item
You know English and prefer US English:
$which = which_lang([ 'en_US' ], \@avail);
=item
You know English and German, German/Germany is preferred:
$which = which_lang([ 'en', 'de_DE' ], \@avail);
=item
You know English and German, but preferably not Swiss German:
$which = which_lang([ 'en', 'de', 'de_*', 'de_CH' ], \@avail);
Here any dialect of German (eg de_DE, de_AT) is preferable to de_CH.
=cut
sub which_lang( $$ ) {
die 'usage: which_lang(listref of preferred langs, listref of available)'
if @_ != 2;
my ($pref, $avail) = @_;
t '$pref=' . d $pref;
t '$avail=' . d $avail;
my (%explicit, %implicit);
my $pos = 0;
# This seems like the best way to make block-nested subroutines
my $add_explicit = sub {
my $l = shift;
die "preferred language $l listed twice"
if defined $explicit{$l};
if (delete $implicit{$l}) { t "moved implicit $l to explicit" }
else { t "adding explicit $l" }
$explicit{$l} = $pos++;
};
my $add_implicit = sub {
my $l = shift;
if (defined $explicit{$l}) {
t "$l already explict, not adding implicitly";
}
else {
if (defined $implicit{$l}) { t "replacing implicit $l" }
else { t "adding implicit $l" }
$implicit{$l} = $pos++
}
};
foreach (@$pref) {
$add_explicit->($_);
if ($_ eq 'C') {
# Doesn't imply anything - C already matches every
# possible language.
#
}
elsif (/^[a-z][a-z]$/) {
# 'en' implies any dialect of 'en' also
$add_implicit->($_ . '_*');
}
elsif (/^([a-z][a-z])_([A-Z][A-Z])(?:\@.*)?$/) { # ignore @whatever
# 'en_GB' implies 'en', and secondly any other dialect
$add_implicit->($1);
$add_implicit->($1 . '_*');
}
elsif (/^([a-z][a-z])_\*$/) {
# 'en_*' doesn't imply anything - it shouldn't be used
# except in odd cases.
#
}
else { die "bad language '$_'" } # FIXME support 'English' etc
}
my %ranking = reverse (%explicit, %implicit);
if ($Log::TraceMessages::On) {
t 'ranking:';
foreach (sort { $a <=> $b } keys %ranking) {
t "$_\t$ranking{$_}";
}
}
my @langs = @ranking{sort { $a <=> $b } keys %ranking};
my %avail;
foreach (@$avail) {
next if not defined;
$avail{$_}++ && die "available language $_ listed twice";
}
while (defined (my $lang = shift @langs)) {
if ($lang eq 'C') {
# Match first available language.
return $avail->[0];
}
elsif ($lang =~ /^([a-z][a-z])_\*$/) {
# Any dialect of $1 (but not standard). Work through all
# of @$avail in order trying to find a match. (So there
# is a slight bias towards languages appearing earlier in
# @$avail.)
#
my $base_lang = $1;
AVAIL: foreach (@$avail) {
next if not defined;
if (/^\Q$base_lang\E_/) {
# Well, it matched... but maybe this dialect was
# explicitly specified with a lower priority.
#
foreach my $lower_lang (@langs) {
next AVAIL if (/^\Q$lower_lang\E$/);
}
return $_;
}
}
}
else {
# Exact match
return $lang if $avail{$lang};
}
}
# Couldn't find anything - pick first available language.
return $avail->[0];
}
=pod
Whereas C<which_lang()> picks the best language from a list of
alternatives, C<acceptable_lang()> answers whether a single
language is included (explicitly or implicitly) in the list of wanted
languages. It adds the implicit dialects in the same way.
=cut
sub acceptable_lang( $$ ) {
die 'usage: acceptable_lang(listref of wanted langs, lang)'
if @_ != 2;
my ($pref, $l) = @_;
t '$pref=' . d $pref;
t '$l=' . d $l;
# We just need to ignore the dialects and compare the main part.
my @pref = @$pref; # copy
$l =~ s/_.+//;
foreach (@pref) {
s/_.+//;
return 1 if $l eq $_;
}
return 0;
}
=pod
=head1 AUTHOR
Ed Avis, ed@membled.com
=head1 SEE ALSO
perl(1).
=cut
1;
__END__
|