This file is indexed.

/usr/share/perl5/Text/Unidecode.pm is in libtext-unidecode-perl 1.30-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
;;;;# -*-coding:utf-8;-*-                                               µ ← col73

require 5;
use 5.8.0;
package Text::Unidecode;
$Last_Modified =' Time-stamp: "2016-11-26 05:01:56 MST"';
use utf8;
use strict;
use integer; # vroom vroom!
use vars qw($VERSION @ISA @EXPORT @Char $UNKNOWN $NULLMAP $TABLE_SIZE $Last_Modified
   $Note_Broken_Tables %Broken_Table_Size %Broken_Table_Copy
);
$VERSION = '1.30';
require Exporter;
@ISA = ('Exporter');
@EXPORT = ('unidecode');
$Note_Broken_Tables = 0;
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG }
$UNKNOWN = '[?] ';
$TABLE_SIZE = 256;
$NULLMAP = [( $UNKNOWN ) x $TABLE_SIZE];  # for blocks we can't load

#--------------------------------------------------------------------------
{
  my $x = join '', "\x00" .. "\x7F";
  die "the 7-bit purity test fails!" unless $x eq unidecode($x);
}

#--------------------------------------------------------------------------

sub unidecode {
  # Destructive in void context -- in other contexts, nondestructive.

  unless(@_) {  # Sanity: Nothing coming in!
    return() if wantarray;
    return '';
  }

  if( defined wantarray ) {
    # We're in list or scalar context (i.e., just not void context.)
    #  So make @_'s items no longer be aliases.
    @_ = map $_, @_;
  } else {
    # Otherwise (if we're in void context), then just let @_ stay
    #  aliases, and alter their elements IN-PLACE!
  }

  foreach my $n (@_) {
    next unless defined $n;    

    # Shut up potentially fatal warnings about UTF-16 surrogate
    # characters when running under perl -w
    # This is per https://rt.cpan.org/Ticket/Display.html?id=97456
    no warnings 'utf8';

    $n =~ s~([^\x00-\x7f])~${$Char[ord($1)>>8]||t($1)}[ord($1)&255]~egs;
  }
  # That means:
  #   Replace character 0xABCD with $Char[0xAB][0xCD], loading
  #    the table 0xAB as needed.
  #
  #======================================================================
  #
  # Yes, that's dense code.  It's the warp core!
  # Here is an expansion into pseudocode... as best as I can manage it...
  #
  #     $character = $1;
  #     $charnum = ord($character);
  #     $charnum_lowbits  = $charnum & 255;
  #     $charnum_highbits = $charnum >> 8;
  #  
  #     $table_ref = $Char->[$charnum_highbits];
  #  
  #     if($table_ref) {
  #       # As expected, we got the arrayref for this table.
  #     } else {
  #       # Uhoh, we couldn't find the arrayref for this table.
  #       # So we call t($character).
  #       #  It loads a table.  Namely, it does:
  #       Load_Table_For( $charnum_highbits );
  #        # ...which does magic, and puts something in
  #        #     $Char->[$charnum_highbits],
  #        #     so NOW we actually CAN do:
  #       $table_ref = $Char->[$charnum_highbits];
  #     }
  #     
  #     $for_this_char
  #       = $table_ref->[ $charnum_lowbits ];
  #
  #     # Although the syntax we actually use is the odd
  #      but COMPLETE EQUIVALENT to this syntax:
  #  
  #     $for_this_char
  #       = ${ $table_ref }[ $charnum_lowbits ];
  #     
  #     and $for_this_char is the replacement text for this
  #      character, in:
  #      $n =~ s~(char)~replacement~egs
  #
  #  (And why did I use s~x~y~ instead of s/x/y/ ?
  #  It's all the same for Perl: perldoc perlretut says:
  #       As with the match "m//" operator, "s///" can 
  #       use other delimiters, such as "s!!!" and "s{}{}", 
  #  I didn't do it for sake of obscurity. I think it's just to
  #  keep my editor's syntax highlighter from crashing,
  #  which was a problem with s/// when the insides are as gory
  #  as we have here.

  return unless defined wantarray; # void context
  return @_ if wantarray;  # normal list context -- return the copies
  # Else normal scalar context:
  return $_[0] if @_ == 1;
  return join '', @_;      # rarer fallthru: a list in, but a scalar out.
}

#======================================================================

sub make_placeholder_map {
  return [( $UNKNOWN ) x $TABLE_SIZE ];
}
sub make_placeholder_map_nulls {
  return [( "" ) x $TABLE_SIZE ];
}

#======================================================================

sub t {   # "t" is for "t"able.
  # Load (and return) a char table for this character
  # this should get called only once per table per session.
  my $bank = ord($_[0]) >> 8;
  return $Char[$bank] if $Char[$bank];
 
  load_bank($bank);
        
  # Now see how that fared...

  if(ref($Char[$bank] || '') ne 'ARRAY') {
    DEBUG > 1 and print
      " Loading failed for bank $bank (err $@).  Using null map.\n";
    return $Char[$bank] = $NULLMAP;
  }


  DEBUG > 1 and print " Loading succeeded.\n";
  my $cb = $Char[$bank];

  # Sanity-check it:
  if(@$cb == $TABLE_SIZE) {
    # As expected.  Fallthru.

  } else {
    if($Note_Broken_Tables) {
      $Broken_Table_Size{$bank} = scalar @$cb;
      $Broken_Table_Copy{$bank} = [ @$cb ];
    }

    if(@$cb > $TABLE_SIZE) {
      DEBUG and print "Bank $bank is too large-- it has ", scalar @$cb,
        " entries in it.  Pruning.\n";
      splice @$cb, $TABLE_SIZE;
       # That two-argument form splices everything off into nowhere,
       #  starting with the first overage character.

    } elsif( @$cb < $TABLE_SIZE) {
      DEBUG and print "Bank $bank is too small-- it has ", scalar @$cb,
        " entries in it.  Now padding it.\n";
      if(@$cb == 0) {
        DEBUG and print "  (Yes, ZERO entries!)\n";
      }
      push @$cb,
	  ( $UNKNOWN )  x  ( $TABLE_SIZE - @$cb)
	  # i.e., however many items, times the deficit
      ;
      # And fallthru...

    } else {
      die "UNREACHABLE CODE HERE (INSANE)";
    }
  }

  # Check for undefness in block:

  for(my $i = 0; $i < $TABLE_SIZE; ++$i) {
    unless(defined $cb->[$i]) {
      DEBUG and printf "Undef at position %d in block x%02x\n",
        $i, $bank;
      $cb->[$i] = '';
    }
  }

  return $Char[$bank];
}

#-----------------------------------------------------------------------

our $eval_loaded_okay;

sub load_bank {

  # This is in its own sub, for sake of sweeping the scary thing
  #  (namely, a call to eval) under the rug.
  # I.e., to paraphrase what Larry Wall once said to me: if
  #  you're going to do something odd, maybe you should do it
  #  in private.

  my($banknum) = @_;  # just as an integer value

  DEBUG and printf
      "# Eval-loading %s::x%02x ...\n";

  $eval_loaded_okay = 0;
  my $code = 
      sprintf( "require %s::x%02x; \$eval_loaded_okay = 1;\n",
               __PACKAGE__,
	       $banknum);

  {
    local $SIG{'__DIE__'};
    eval($code);
  }

  return 1 if $eval_loaded_okay;
  return 0;
}

#======================================================================

1;
__END__

=encoding utf8

=head1 NAME

Text::Unidecode -- plain ASCII transliterations of Unicode text

=head1 SYNOPSIS

  use utf8;
  use Text::Unidecode;
  print unidecode(
    "北亰\n"
    # Chinese characters for Beijing (U+5317 U+4EB0)
  );
  
  # That prints: Bei Jing 

=head1 DESCRIPTION

It often happens that you have non-Roman text data in Unicode, but
you can't display it-- usually because you're trying to
show it to a user via an application that doesn't support Unicode,
or because the fonts you need aren't accessible.  You could
represent the Unicode characters as "???????" or
"\15BA\15A0\1610...", but that's nearly useless to the user who
actually wants to read what the text says.

What Text::Unidecode provides is a function, C<unidecode(...)> that
takes Unicode data and tries to represent it in US-ASCII characters
(i.e., the universally displayable characters between 0x00 and
0x7F).  The representation is
almost always an attempt at I<transliteration>-- i.e., conveying,
in Roman letters, the pronunciation expressed by the text in
some other writing system.  (See the example in the synopsis.)


NOTE:

To make sure your perldoc/Pod viewing setup for viewing this page is
working: The six-letter word "résumé" should look like "resume" with
an "/" accent on each "e".

For further tests, and help if that doesn't work, see below,
L</A POD ENCODING TEST>.


=head1 DESIGN PHILOSOPHY

Unidecode's ability to transliterate from a given language is limited
by two factors:

=over

=item * The amount and quality of data in the written form of the
original language

So if you have Hebrew data
that has no vowel points in it, then Unidecode cannot guess what
vowels should appear in a pronunciation.
S f y hv n vwls n th npt, y wn't gt ny vwls
n th tpt.  (This is a specific application of the general principle
of "Garbage In, Garbage Out".)

=item * Basic limitations in the Unidecode design

Writing a real and clever transliteration algorithm for any single
language usually requires a lot of time, and at least a passable
knowledge of the language involved.  But Unicode text can convey
more languages than I could possibly learn (much less create a
transliterator for) in the entire rest of my lifetime.  So I put
a cap on how intelligent Unidecode could be, by insisting that
it support only context-I<in>sensitive transliteration.  That means
missing the finer details of any given writing system,
while still hopefully being useful.

=back

Unidecode, in other words, is quick and
dirty.  Sometimes the output is not so dirty at all:
Russian and Greek seem to work passably; and
while Thaana (Divehi, AKA Maldivian) is a definitely non-Western
writing system, setting up a mapping from it to Roman letters
seems to work pretty well.  But sometimes the output is I<very
dirty:> Unidecode does quite badly on Japanese and Thai.

If you want a smarter transliteration for a particular language
than Unidecode provides, then you should look for (or write)
a transliteration algorithm specific to that language, and apply
it instead of (or at least before) applying Unidecode.

In other words, Unidecode's
approach is broad (knowing about dozens of writing systems), but
shallow (not being meticulous about any of them).

=head1 FUNCTIONS

Text::Unidecode provides one function, C<unidecode(...)>, which
is exported by default.  It can be used in a variety of calling contexts:

=over

=item C<$out = unidecode( $in );> # scalar context

This returns a copy of $in, transliterated.

=item C<$out = unidecode( @in );> # scalar context

This is the same as C<$out = unidecode(join "", @in);>

=item C<@out = unidecode( @in );> # list context

This returns a list consisting of copies of @in, each transliterated.  This
is the same as C<@out = map scalar(unidecode($_)), @in;>

=item C<unidecode( @items );> # void context

=item C<unidecode( @bar, $foo, @baz );> # void context

Each item on input is replaced with its transliteration.  This
is the same as C<for(@bar, $foo, @baz) { $_ = unidecode($_) }>

=back

You should make a minimum of assumptions about the output of
C<unidecode(...)>.  For example, if you assume an all-alphabetic
(Unicode) string passed to C<unidecode(...)> will return an all-alphabetic
string, you're wrong-- some alphabetic Unicode characters are
transliterated as strings containing punctuation (e.g., the
Armenian letter "Թ" (U+0539), currently transliterates as "T`"
(capital-T then a backtick).

However, these are the assumptions you I<can> make:

=over

=item *

Each character 0x0000 - 0x007F transliterates as itself.  That is,
C<unidecode(...)> is 7-bit pure.

=item *

The output of C<unidecode(...)> always consists entirely of US-ASCII
characters-- i.e., characters 0x0000 - 0x007F.

=item *

All Unicode characters translate to a sequence of (any number of)
characters that are newline ("\n") or in the range 0x0020-0x007E.  That
is, no Unicode character translates to "\x01", for example.  (Although if
you have a "\x01" on input, you'll get a "\x01" in output.)

=item *

Yes, some transliterations produce a "\n" but it's just a few, and
only with good reason.  Note that the value of newline ("\n") varies
from platform to platform-- see L<perlport>.

=item *

Some Unicode characters may transliterate to nothing (i.e., empty string).

=item *

Very many Unicode characters transliterate to multi-character sequences.
E.g., Unihan character U+5317, "北", transliterates as the four-character string
"Bei ".

=item *

Within these constraints, I<I may change> the transliteration of characters
in future versions.  For example, if someone convinces me that
that the Armenian letter "Թ", currently transliterated as "T`", would
be better transliterated as "D", I I<may> well make that change.

=item *

Unfortunately, there are many characters that Unidecode doesn't know a
transliteration for.  This is generally because the character has been
added since I last revised the Unidecode data tables.  I'm I<always>
catching up!

=back

=head1 DESIGN GOALS AND CONSTRAINTS

Text::Unidecode is meant to be a transliterator of last resort,
to be used once you've decided that you can't just display the
Unicode data as is, I<and once you've decided you don't have a
more clever, language-specific transliterator available,> or once
you've I<already applied> smarter algorithms or mappings that you prefer
and you now just want Unidecode to do cleanup.

Unidecode
transliterates context-insensitively-- that is, a given character is
replaced with the same US-ASCII (7-bit ASCII) character or characters,
no matter what the surrounding characters are.

The main reason I'm making Text::Unidecode work with only
context-insensitive substitution is that it's fast, dumb, and
straightforward enough to be feasible.  It doesn't tax my
(quite limited) knowledge of world languages.  It doesn't require
me writing a hundred lines of code to get the Thai syllabification
right (and never knowing whether I've gotten it wrong, because I
don't know Thai), or spending a year trying to get Text::Unidecode
to use the ChaSen algorithm for Japanese, or trying to write heuristics
for telling the difference between Japanese, Chinese, or Korean, so
it knows how to transliterate any given Uni-Han glyph.  And
moreover, context-insensitive substitution is still mostly useful,
but still clearly couldn't be mistaken for authoritative.

Text::Unidecode is an example of the 80/20 rule in
action-- you get 80% of the usefulness using just 20% of a
"real" solution.

A "real" approach to transliteration for any given language can
involve such increasingly tricky contextual factors as these:

=over

=item The previous / preceding character(s)

What a given symbol "X" means, could
depend on whether it's followed by a consonant, or by vowel, or
by some diacritic character.

=item Syllables

A character "X" at end of a syllable could mean something
different from when it's at the start-- which is especially problematic
when the language involved doesn't explicitly mark where one syllable
stops and the next starts.

=item Parts of speech

What "X" sounds like at the end of a word,
depends on whether that word is a noun, or a verb, or what.

=item Meaning

By semantic context, you can tell that this ideogram "X" means "shoe"
(pronounced one way) and not "time" (pronounced another),
and that's how you know to transliterate it one way instead of the other.

=item Origin of the word

"X" means one thing in loanwords and/or placenames (and
derivatives thereof), and another in native words.

=item "It's just that way"

"X" normally makes
the /X/ sound, except for this list of seventy exceptions (and words based
on them, sometimes indirectly).  Or: you never can tell which of the three
ways to pronounce "X" this word actually uses; you just have to know
which it is, so keep a dictionary on hand!

=item Language

The character "X" is actually used in several different languages, and you
have to figure out which you're looking at before you can determine how
to transliterate it.

=back

Out of a desire to avoid being mired in I<any> of these kinds of
contextual factors, I chose to exclude I<all of them> and just stick
with context-insensitive replacement.


=head1 A POD ENCODING TEST

=over

=item *

"Brontë" is six characters that should look like "Bronte", but
with double-dots on the "e" character.

=item *

"Résumé" is six characters that should look like "Resume", but
with /-shaped accents on the "e" characters.

=item *

"læti" should be I<four> letters long-- the second letter should not
be two letters "ae", but should be a single letter that
looks like an "a" entirely fused with an "e".

=item *

"χρονος" is six Greek characters that should look kind of like: xpovoc

=item *

"КАК ВАС ЗОВУТ" is three short Russian words that should look a
lot like: KAK BAC 3OBYT

=item *

"ടധ" is two Malayalam characters that should look like: sw

=item *

"丫二十一" is four Chinese characters that should look like: C<Y=+->

=item *

"Hello" is five characters that should look like: Hello

=back

If all of those come out right, your Pod viewing setup is working
fine-- welcome to the 2010s!  If those are full of garbage characters,
consider viewing this page as HTML at
L<https://metacpan.org/pod/Text::Unidecode>
or
L<http://search.cpan.org/perldoc?Text::Unidecode>


If things look mostly okay, but the Malayalam and/or the Chinese are
just question-marks or empty boxes, it's probably just that your
computer lacks the fonts for those.

=head1 TODO

Lots:

* Rebuild the Unihan database.  (Talk about hitting a moving target!)

* Add tone-numbers for Mandarin hanzi?  Namely: In Unihan, when tone
marks are present (like in "kMandarin: dào", should I continue to
transliterate as just "Dao", or should I put in the tone number:
"Dao4"?  It would be pretty jarring to have digits appear where
previously there was just alphabetic stuff-- But tone numbers
make Chinese more readable.
(I have a clever idea about doing this, for Unidecode v2 or v3.)

* Start dealing with characters over U+FFFF.  Cuneiform! Emojis! Whatever!

* Fill in all the little characters that have crept into the Misc Symbols
Etc blocks.

* More things that need tending to are detailed in the TODO.txt file,
included in this distribution.  Normal installs probably don't leave
the TODO.txt lying around, but if nothing else, you can see it at
L<http://search.cpan.org/search?dist=Text::Unidecode>

=head1 MOTTO

The Text::Unidecode motto is:

  It's better than nothing!

...in I<both> meanings: 1) seeing the output of C<unidecode(...)> is
better than just having all font-unavailable Unicode characters
replaced with "?"'s, or rendered as gibberish; and 2) it's the
worst, i.e., there's nothing that Text::Unidecode's algorithm is
better than.  All sensible transliteration algorithms (like for
German, see below) are going to be smarter than Unidecode's.

=head1 WHEN YOU DON'T LIKE WHAT UNIDECODE DOES

I will repeat the above, because some people miss it:

Text::Unidecode is meant to be a transliterator of I<last resort,>
to be used once you've decided that you can't just display the
Unicode data as is, I<and once you've decided you don't have a
more clever, language-specific transliterator available>-- or once
you've I<already applied> a smarter algorithm and now just want Unidecode
to do cleanup.

In other words, when you don't like what Unidecode does, I<do it
yourself.>  Really, that's what the above says.  Here's how
you would do this for German, for example:

In German, there's the typographical convention that an umlaut (the
double-dots on: ä ö ü) can be written as an "-e", like with "Schön"
becoming "Schoen".  But Unidecode doesn't do that-- I have Unidecode
simply drop the umlaut accent and give back "Schon".

(I chose this not because I'm a big meanie, but because
I<generally> changing "ü" to "ue" is disastrous for all text
that's I<not in German>.  Finnish "Hyvää päivää" would turn
into "Hyvaeae paeivaeae".  And I discourage you from being I<yet
another> German who emails me, trying to impel me to consider
a typographical nicety of German to be more important than
I<all other languages>.)

If you know that the text you're handling is probably in German, and
you want to apply the "umlaut becomes -e" rule, here's how to do it
for yourself (and then use Unidecode as I<the fallback> afterwards):

  use utf8;  # <-- probably necessary.

  our( %German_Characters ) = qw(
   Ä AE   ä ae
   Ö OE   ö oe
   Ü UE   ü ue
   ß ss 
  );
  
  use Text::Unidecode qw(unidecode);
  
  sub german_to_ascii {
    my($german_text) = @_;
    
    $german_text =~
      s/([ÄäÖöÜüß])/$German_Characters{$1}/g;
    
    # And now, as a *fallthrough*:
    $german_text = unidecode( $german_text );
    return $german_text;
  }

To pick another example, here's something that's not about a
specific language, but simply having a preference that may or
may not agree with Unidecode's (i.e., mine).  Consider the "¥"
symbol.  Unidecode changes that to "Y=".  If you want "¥" as
"YEN", then...

  use Text::Unidecode qw(unidecode);

  sub my_favorite_unidecode {
    my($text) = @_;
    
    $text =~ s/¥/YEN/g;
    
    # ...and anything else you like, such as:
    $text =~ s/€/Euro/g;
    
    # And then, as a fallback,...
    $text = unidecode($text);
     
    return $text;    
  }

Then if you do:

  print my_favorite_unidecode("You just won ¥250,000 and €40,000!!!");

...you'll get:

  You just won YEN250,000 and Euro40,000!!!

...just as you like it.

(By the way, the reason I<I> don't have Unidecode just turn "¥" into "YEN"
is that the same symbol also stands for yuan, the Chinese
currency.  A "Y=" is nicely, I<safely> neutral as to whether
we're talking about yen or yuan-- Japan, or China.)

Another example: for hanzi/kanji/hanja, I have designed
Unidecode to transliterate according to the value that that
character has in Mandarin (otherwise Cantonese,...).  Some
users have complained that applying Unidecode to Japanese
produces gibberish.

To make a long story short: transliterating from Japanese is
I<difficult> and it requires a I<lot> of context-sensitivity.
If you have text that you're fairly sure is in
Japanese, you're going to have to use a Japanese-specific
algorithm to transliterate Japanese into ASCII.  (And then
you can call Unidecode on the output from that-- it is useful
for, for example, turning fullwidth characters into
their normal (ASCII) forms.

(Note, as of August 2016: I have titanic but tentative plans for
making the value of Unihan characters be something you could set
parameters for at runtime, in changing the order of "Mandarin else
Cantonese else..." in the value retrieval.  Currently that preference
list is hardwired on my end, at module-build time.  Other options I'm
considering allowing for: whether the Mandarin and Cantonese values
should have the tone numbers on them; whether every Unihan value
should have a terminal space; and maybe other clever stuff I haven't
thought of yet.)


=head1 CAVEATS

If you get really implausible nonsense out of C<unidecode(...)>, make
sure that the input data really is a utf8 string.  See
L<perlunicode> and L<perlunitut>.

I<Unidecode will work disastrously bad on Japanese.> That's because
Japanese is very very hard.  To extend the Unidecode motto,
Unidecode is better than nothing, and with Japanese, I<just barely!>

On pure Mandarin, Unidecode will frequently give odd values--
that's because a single hanzi can have several readings, and Unidecode
only knows what the Unihan database says is the most common one.


=head1 THANKS

Thanks to (in only the sloppiest of sorta-chronological order): 
Jordan Lachler, Harald Tveit Alvestrand, Melissa Axelrod,
Abhijit Menon-Sen, Mark-Jason Dominus, Joe Johnston,
Conrad Heiney, fileformat.info,
Philip Newton, 唐鳳, Tomaž Šolc, Mike Doherty, JT Smith and the
MadMongers, Arden Ogg, Craig Copris,
David Cusimano, Brendan Byrd, Hex Martin,
and
I<many>
other pals who have helped with the ideas or values for Unidecode's
transliterations, or whose help has been in the
secret F5 tornado that constitutes the internals of Unidecode's
implementation.

And thank you to the many people who have encouraged me to plug away
at this project.  A decade went by before I had any idea that more
than about 4 or 5 people were using or getting any value
out of Unidecode.  I am told that actually
my figure was missing some zeroes on the end!


=head1 PORTS

Some wonderful people have ported Unidecode to other languages!

=over

=item *

Python: L<https://pypi.python.org/pypi/Unidecode>

=item *

PHP: L<https://github.com/silverstripe-labs/silverstripe-unidecode>

=item *

Ruby: L<http://www.rubydoc.info/gems/unidecode/1.0.0/frames>

=item *

JavaScript: L<https://www.npmjs.org/package/unidecode>

=item *

Java: L<https://github.com/xuender/unidecode>

=back

I can't vouch for the details of each port, but these are clever
people, so I'm sure they did a fine job.


=head1 SEE ALSO

An article I wrote for I<The Perl Journal> about
Unidecode:  L<http://interglacial.com/tpj/22/>
(B<READ IT!>)

Jukka Korpela's L<http://www.cs.tut.fi/~jkorpela/fui.html8> which is
brilliantly useful, and its code is brilliant (so, view source!).  I
was I<kinda> thinking about maybe doing something I<sort of> like that
for the v2.x versions of Unicode-- but now he's got me convinced that
I should go right ahead.

Tom Christiansen's
I<Perl Unicode Cookbook>,
L<http://www.perl.com/pub/2012/04/perlunicook-standard-preamble.html>

Unicode Consortium: L<http://www.unicode.org/>

Searchable Unihan database:
L<http://www.unicode.org/cgi-bin/GetUnihanData.pl>

Geoffrey Sampson.  1990.  I<Writing Systems: A Linguistic Introduction.>
ISBN: 0804717567

Randall K. Barry (editor).  1997.  I<ALA-LC Romanization Tables:
Transliteration Schemes for Non-Roman Scripts.>
ISBN: 0844409405
[ALA is the American Library Association; LC is the Library of
Congress.]

Rupert Snell.  2000.  I<Beginner's Hindi Script (Teach Yourself
Books).>  ISBN: 0658009109

=head1 LICENSE

Copyright (c) 2001, 2014, 2015, 2016 Sean M. Burke.

Unidecode is distributed under the Perl Artistic License
( L<perlartistic> ), namely:

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.

=head1 DISCLAIMER

Much of Text::Unidecode's internal data is based on data from The
Unicode Consortium, with which I am unaffiliated.  A good deal of the
internal data comes from suggestions that have been contributed by
people other than myself.

The views and conclusions contained in my software and documentation
are my own-- they should not be interpreted as representing official
policies, either expressed or implied, of The Unicode Consortium; nor
should they be interpreted as necessarily the views or conclusions of
people who have contributed to this project.

Moreover, I discourage you from inferring that choices that I've made
in Unidecode reflect political or linguistic prejudices on my
part.  Just because Unidecode doesn't do great on your language,
or just because it might seem to do better on some another
language, please don't think I'm out to get you!

=head1 AUTHOR

Your pal, Sean M. Burke C<sburke@cpan.org>

=head1 O HAI!

If you're using Unidecode for anything interesting, be cool and
email me, I'm always curious what people use this for.  (The
answers so far have surprised me!)

=cut

#################### SCOOBIE SNACK ####################

Lest there be any REMAINING doubt that the Unicode Consortium has
a sense of humor, the CDROM that comes with /The Unicode Standard,
Version 3.0/ book, has an audio track of the Unicode anthem [!].
The lyrics are:

	Unicode, Oh Unicode!
	--------------------

	Oh, beautiful for Uni-Han,
	for spacious User Zone!
	For rampant scripts of India
	and polar Nunavut!

	  Chorus:
		Unicode, Oh Unicode!
		May all your code points shine forever
		and your beacon light the world!

	Oh, marvelous for sixteen bits,
	for precious surrogates!
	For Bi-Di algorithm dear
	and stalwart I-P-A!

	Oh, glorious for Hangul fair,
	for symbols mathematical!
	For myriad exotic scripts
	and punctuation we adore!

# End.