/usr/share/perl5/Debbugs/UTF8.pm is in libdebbugs-perl 2.6.0.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | # This module is part of debbugs, and is released
# under the terms of the GPL version 2, or any later
# version at your option.
# See the file README and COPYING for more information.
#
# Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
package Debbugs::UTF8;
=head1 NAME
Debbugs::UTF8 -- Routines for handling conversion of charsets to UTF8
=head1 SYNOPSIS
use Debbugs::UTF8;
=head1 DESCRIPTION
This module contains routines which convert from various different
charsets to UTF8.
=head1 FUNCTIONS
=cut
use warnings;
use strict;
use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
use Exporter qw(import);
BEGIN{
$VERSION = 1.00;
$DEBUG = 0 unless defined $DEBUG;
%EXPORT_TAGS = (utf8 => [qw(encode_utf8_structure encode_utf8_safely),
qw(convert_to_utf8 decode_utf8_safely)],
);
@EXPORT = (@{$EXPORT_TAGS{utf8}});
@EXPORT_OK = ();
Exporter::export_ok_tags(keys %EXPORT_TAGS);
$EXPORT_TAGS{all} = [@EXPORT_OK];
}
use Carp;
$Carp::Verbose = 1;
use Encode qw(encode_utf8 is_utf8 decode decode_utf8);
use Text::Iconv;
use Storable qw(dclone);
=head1 UTF-8
These functions are exported with the :utf8 tag
=head2 encode_utf8_structure
%newdata = encode_utf8_structure(%newdata);
Takes a complex data structure and encodes any strings with is_utf8
set into their constituent octets.
=cut
our $depth = 0;
sub encode_utf8_structure {
++$depth;
my @ret;
for $_ (@_) {
if (ref($_) eq 'HASH') {
push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
}
elsif (ref($_) eq 'ARRAY') {
push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
}
elsif (ref($_)) {
# we don't know how to handle non hash or non arrays
push @ret,$_;
}
else {
push @ret,encode_utf8_safely($_);
}
}
--$depth;
return @ret;
}
=head2 encode_utf8_safely
$octets = encode_utf8_safely($string);
Given a $string, returns the octet equivalent of $string if $string is
in perl's internal encoding; otherwise returns $string.
Silently returns REFs without encoding them. [If you want to deeply
encode REFs, see encode_utf8_structure.]
=cut
sub encode_utf8_safely{
my @ret;
for my $r (@_) {
if (not ref($r) and is_utf8($r)) {
$r = encode_utf8($r);
}
push @ret,$r;
}
return wantarray ? @ret : (@_ > 1 ? @ret : $ret[0]);
}
=head2 decode_utf8_safely
$string = decode_utf8_safely($octets);
Given $octets in UTF8, returns the perl-internal equivalent of $octets
if $octets does not have is_utf8 set; otherwise returns $octets.
Silently returns REFs without encoding them.
=cut
sub decode_utf8_safely{
my @ret;
for my $r (@_) {
if (not ref($r) and not is_utf8($r)) {
$r = decode_utf8($r);
}
push @ret, $r;
}
return wantarray ? @ret : (@_ > 1 ? @ret : $ret[0]);
}
=head2 convert_to_utf8
$utf8 = convert_to_utf8("text","charset");
=cut
sub convert_to_utf8 {
my ($data,$charset,$internal_call) = @_;
$internal_call //= 0;
if (is_utf8($data)) {
cluck("utf8 flag is set when calling convert_to_utf8");
return $data;
}
$charset = uc($charset//'UTF-8');
if ($charset eq 'RAW') {
croak("Charset must not be raw when calling convert_to_utf8");
}
## if the charset is unknown or unknown 8 bit, assume that it's UTF-8.
if ($charset =~ /unknown/i) {
$charset = 'UTF-8'
}
my $iconv_converter;
eval {
$iconv_converter = Text::Iconv->new($charset,"UTF-8") or
die "Unable to create converter for '$charset'";
};
if ($@) {
return undef if $internal_call;
warn $@;
# We weren't able to create the converter, so use Encode
# instead
return __fallback_convert_to_utf8($data,$charset);
}
my $converted_data = $iconv_converter->convert($data);
# if the conversion failed, retval will be undefined or perhaps
# -1.
my $retval = $iconv_converter->retval();
if (not defined $retval or
$retval < 0
) {
# try iso8559-1 first
if (not $internal_call) {
my $call_back_data = convert_to_utf8($data,'ISO8859-1',1);
# if there's an à (0xC3), it's probably something
# horrible, and we shouldn't try to convert it.
if (defined $call_back_data and $call_back_data !~ /\x{C3}/) {
return $call_back_data;
}
}
# Fallback to encode, which will probably also fail.
return __fallback_convert_to_utf8($data,$charset);
}
return decode("UTF-8",$converted_data);
}
# this returns data in perl's internal encoding
sub __fallback_convert_to_utf8 {
my ($data, $charset) = @_;
# raw data just gets returned (that's the charset WordDecorder
# uses when it doesn't know what to do)
return $data if $charset eq 'raw';
if (not defined $charset and not is_utf8($data)) {
warn ("Undefined charset, and string '$data' is not in perl's internal encoding");
return $data;
}
# lets assume everything that doesn't have a charset is utf8
$charset //= 'utf8';
## if the charset is unknown, assume it's UTF-8
if ($charset =~ /unknown/i) {
$charset = 'utf8';
}
my $result;
eval {
$result = decode($charset,$data,0);
};
if ($@) {
warn "Unable to decode charset; '$charset' and '$data': $@";
return $data;
}
return $result;
}
1;
__END__
|