/usr/share/perl5/PPI/Token/BOM.pm is in libppi-perl 1.220-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | package PPI::Token::BOM;
=pod
=head1 NAME
PPI::Token::BOM - Tokens representing Unicode byte order marks
=head1 INHERITANCE
PPI::Token::BOM
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
This is a special token in that it can only occur at the beginning of
documents. If a BOM byte mark occurs elsewhere in a file, it should
be treated as L<PPI::Token::Whitespace>. We recognize the byte order
marks identified at this URL:
L<http://www.unicode.org/faq/utf_bom.html#BOM>
UTF-32, big-endian 00 00 FE FF
UTF-32, little-endian FF FE 00 00
UTF-16, big-endian FE FF
UTF-16, little-endian FF FE
UTF-8 EF BB BF
Note that as of this writing, PPI only has support for UTF-8
(namely, in POD and strings) and no support for UTF-16 or UTF-32. We
support the BOMs of the latter two for completeness only.
The BOM is considered non-significant, like white space.
=head1 METHODS
There are no additional methods beyond those provided by the parent
L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token ();
use vars qw{$VERSION @ISA};
BEGIN {
$VERSION = '1.220';
@ISA = 'PPI::Token';
}
sub significant() { '' }
#####################################################################
# Parsing Methods
my %bom_types = (
"\x00\x00\xfe\xff" => 'UTF-32',
"\xff\xfe\x00\x00" => 'UTF-32',
"\xfe\xff" => 'UTF-16',
"\xff\xfe" => 'UTF-16',
"\xef\xbb\xbf" => 'UTF-8',
);
sub __TOKENIZER__on_line_start {
my $t = $_[1];
$_ = $t->{line};
if (m/^(\x00\x00\xfe\xff | # UTF-32, big-endian
\xff\xfe\x00\x00 | # UTF-32, little-endian
\xfe\xff | # UTF-16, big-endian
\xff\xfe | # UTF-16, little-endian
\xef\xbb\xbf) # UTF-8
/xs) {
my $bom = $1;
if ($bom_types{$bom} ne 'UTF-8') {
return $t->_error("$bom_types{$bom} is not supported");
}
$t->_new_token('BOM', $bom) or return undef;
$t->{line_cursor} += length $bom;
}
# Continue just as if there was no BOM
$t->{class} = 'PPI::Token::Whitespace';
return $t->{class}->__TOKENIZER__on_line_start($t);
}
1;
=pod
=head1 SUPPORT
See the L<support section|PPI/SUPPORT> in the main module
=head1 AUTHOR
Chris Dolan E<lt>cdolan@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2001 - 2011 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut
|