/usr/bin/xml_pp is in xml-twig-tools 1:3.50-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | #!/usr/bin/perl -w
# $Id: /xmltwig/trunk/tools/xml_pp/xml_pp 32 2008-01-18T13:11:52.128782Z mrodrigu $
use strict;
use XML::Twig;
use File::Temp qw/tempfile/;
use File::Basename qw/dirname/;
my @styles= XML::Twig->_pretty_print_styles; # from XML::Twig
my $styles= join '|', @styles; # for usage
my %styles= map { $_ => 1} @styles; # to check option
my $DEFAULT_STYLE= 'indented';
my $USAGE= "usage: $0 [-v] [-i<extension>] [-s ($styles)] [-p <tag(s)>] [-e <encoding>] [-l] [-f <file>] [<files>]";
# because of the -i.bak option I don't think I can use one of the core
# option processing modules, so it's custom handling and no clusterization :--(
my %opt= process_options(); # changes @ARGV
my @twig_options=( pretty_print => $opt{style},
error_context => 1,
);
if( $opt{preserve_space_in})
{ push @twig_options, keep_spaces_in => $opt{preserve_space_in};}
if( $opt{encoding})
{ push @twig_options, output_encoding => $opt{encoding};
}
else
{ push @twig_options, keep_encoding => 1; }
# in normal (ie not -l) mode tags are output as soon as possible
push @twig_options, twig_handlers => { _all_ => sub { $_[0]->flush } }
unless( $opt{load});
if( @ARGV)
{ foreach my $file (@ARGV)
{ print STDERR "$file\n" if( $opt{verbose});
my $t= XML::Twig->new( @twig_options);
my $tempfile;
if( $opt{in_place})
{ (undef, $tempfile)= tempfile( DIR => dirname( $file)) or die "cannot create tempfile for $file: $!\n" ;
open( PP_OUTPUT, ">$tempfile") or die "cannot create tempfile $tempfile: $!";
select PP_OUTPUT;
}
$t= $t->safe_parsefile( $file);
if( $t)
{ if( $opt{load}) { $t->print; }
select STDOUT;
if( $opt{in_place})
{ close PP_OUTPUT;
my $mode= mode( $file);
if( $opt{backup})
{ my $backup= backup( $file, $opt{backup});
rename( $file, $backup) or die "cannot create backup file $backup: $!";
}
rename( $tempfile, $file) or die "cannot overwrite file $file: $!";
if( $mode ne mode( $file)) { chmod $mode, $file or die "cannot set $file mode to $mode: $!"; }
}
}
else
{ if( defined $tempfile)
{ unlink $tempfile or die "cannot unlink temp file $tempfile: $!"; }
die $@;
}
}
}
else
{ my $t= XML::Twig->new( @twig_options);
$t->parse( \*STDIN);
if( $opt{load}) { $t->print; }
}
sub mode
{ my( $file)= @_;
return (stat($file))[2];
}
sub process_options
{ my %opt;
while( @ARGV && ($ARGV[0]=~ m{^-}) )
{ my $opt= shift @ARGV;
if( ($opt eq '-v') || ($opt eq '--verbose') )
{ die $USAGE if( $opt{verbose});
$opt{verbose}= 1;
}
elsif( ($opt eq '-s') || ($opt eq '--style') )
{ die $USAGE if( $opt{style});
$opt{style}= shift @ARGV;
die $USAGE unless( $styles{$opt{style}});
}
elsif( ($opt=~ m{^-i(.*)$}) || ($opt=~ m{^--in_place(.*)$}) )
{ die $USAGE if( $opt{in_place});
$opt{in_place}= 1;
$opt{backup}= $1 ||'';
}
elsif( ($opt eq '-p') || ($opt eq '--preserve') )
{ my $tags= shift @ARGV;
my @tags= split /\s+/, $tags;
$opt{preserve_space_in} ||= [];
push @{$opt{preserve_space_in}}, @tags;
}
elsif( ($opt eq '-e') || ($opt eq '--encoding') )
{ die $USAGE if( $opt{encoding});
$opt{encoding}= shift @ARGV;
}
elsif( ($opt eq '-l') || ($opt eq '--load'))
{ die $USAGE if( $opt{load});
$opt{load}=1;
}
elsif( ($opt eq '-f') || ($opt eq '--files') )
{ my $file= shift @ARGV;
push @ARGV, files_from( $file);
}
elsif( ($opt eq '-h') || ($opt eq '--help'))
{ system "pod2text", $0; exit; }
elsif( $opt eq '--')
{ last; }
else
{ die $USAGE; }
}
$opt{style} ||= $DEFAULT_STYLE;
return %opt;
}
# get the list of files (one per line) from a file
sub files_from
{ my $file= shift;
open( FILES, "<$file") or die "cannot open file $file: $!";
my @files;
while( <FILES>) { chomp; push @files, $_; }
close FILES;
return @files;
}
sub backup
{ my( $file, $extension)= @_;
my $backup;
if( $extension=~ m{\*})
{ ($backup= $extension)=~ s{\*}{$file}g; }
else
{ $backup= $file.$extension; }
return $backup;
}
__END__
=head1 NAME
xml_pp - xml pretty-printer
=head1 SYNOPSYS
xml_pp [options] [<files>]
=head1 DESCRIPTION
XML pretty printer using XML::Twig
=head1 OPTIONS
=over 4
=item -i[<extension>]
edits the file(s) in place, if an extension is provided (no space between
C<-i> and the extension) then the original file is backed-up with that extension
The rules for the extension are the same as Perl's (see perldoc perlrun): if
the extension includes no "*" then it is appended to the original file name,
If the extension does contain one or more "*" characters, then each "*" is
replaced with the current filename.
=item -s <style>
the style to use for pretty printing: none, nsgmls, nice, indented, record, or
record_c (see XML::Twig docs for the exact description of those styles),
'indented' by default
=item -p <tag(s)>
preserves white spaces in tags. You can use several C<-p> options or quote the
tags if you need more than one
=item -e <encoding>
use XML::Twig output_encoding (based on Text::Iconv or Unicode::Map8 and
Unicode::String) to set the output encoding. By default the original encoding
is preserved.
If this option is used the XML declaration is updated (and created if there was
none).
Make sure that the encoding is supported by the parser you use if you want to
be able to process the pretty_printed file (XML::Parser does not support
'latin1' for example, you have to use 'iso-8859-1')
=item -l
loads the documents in memory instead of outputing them as they are being
parsed.
This prevents a bug (see L<BUGS|bugs>) but uses more memory
=item -f <file>
read the list of files to process from <file>, one per line
=item -v
verbose (list the current file being processed)
=item --
stop argument processing (to process files that start with -)
=item -h
display help
=back
=head1 EXAMPLES
xml_pp foo.xml > foo_pp.xml # pretty print foo.xml
xml_pp < foo.xml > foo_pp.xml # pretty print from standard input
xml_pp -v -i.bak *.xml # pretty print .xml files, with backups
xml_pp -v -i'orig_*' *.xml # backups are named orig_<filename>
xml_pp -i -p pre foo.xhtml # preserve spaces in pre tags
xml_pp -i.bak -p 'pre code' foo.xml # preserve spaces in pre and code tags
xml_pp -i.bak -p pre -p code foo.xml # same
xml_pp -i -s record mydb_export.xml # pretty print using the record style
xml_pp -e utf8 -i foo.xml # output will be in utf8
xml_pp -e iso-8859-1 -i foo.xml # output will be in iso-8859-1
xml_pp -v -i.bak -f lof # pretty print in place files from lof
xml_pp -- -i.xml # pretty print the -i.xml file
xml_pp -l foo.xml # loads the entire file in memory
# before pretty printing it
xml_pp -h # display help
=head1 BUGS
Elements with mixed content that start with an embedded element get an extra \n
<elt><b>b</b>toto<b>bold</b></elt>
will be output as
<elt>
<b>b</b>toto<b>bold</b></elt>
Using the C<-l> option solves this bug (but uses more memory)
=head1 TODO
update XML::Twig to use Encode with perl 5.8.0
=head1 AUTHOR
Michel Rodriguez <mirod@xmltwig.com>
|