/usr/bin/html2wiki is in libhtml-wikiconverter-perl 0.68-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
package main;
use warnings;
use strict;
use HTML::WikiConverter;
my %o = H::WC::GetOpts->get_opts();
my $wc = new HTML::WikiConverter( %o );
my $html = do { local $/; <> };
print $wc->html2wiki($html), "\n";
#
# Attribute/option handling
#
package H::WC::GetOpts;
use Params::Validate ':types';
use Getopt::Long;
use Pod::Usage;
sub get_opts {
my %attrs = known_attributes();
my %o = map { $_ => undef } keys %attrs;
my @optspec = (
\%o,
'list' => sub { warn "Installed dialects:\n"; print map "$_\n", HTML::WikiConverter->available_dialects; exit(1); },
'help' => sub { pod2usage( -exit => 1, -verbose => 0 ) },
'options' => sub { warn "Accepted options:\n"; print map "--$_\n", sort keys %attrs; exit(1); },
attrs2optspecs(\%attrs)
);
eval { GetOptions( @optspec ) or pod2usage(2) };
die "problem parsing command-line options: $@" if $@;
$o{dialect} ||= $ENV{WCDIALECT};
pod2usage(2) unless $o{dialect};
remove_ignored_opts(\%o);
return %o;
}
sub known_attributes {
my %attributes = (
%{ HTML::WikiConverter->__default_attribute_specs },
);
my @modules = map { "HTML::WikiConverter::$_" } HTML::WikiConverter->available_dialects;
foreach my $module ( @modules ) {
next unless eval "use $module; 1";
my %attrs = %{ $module->attributes };
foreach my $attr ( keys %attrs ) {
$attributes{$attr} = $attrs{$attr};
}
}
delete $attributes{$_} for IGNORED_ATTRS();
# Normalize attr name with dashes
foreach my $attr ( keys %attributes ) {
my $new_attr = $attr;
if( $new_attr =~ s/_/-/g ) {
$attributes{$new_attr} = $attributes{$attr};
delete $attributes{$attr};
}
}
return %attributes;
}
sub attrs2optspecs {
my $attrs = shift;
my @specs = ( );
my %forced_optspecs = FORCED_OPTSPECS();
while( my( $attr, $spec ) = each %$attrs ) {
my $type = $spec->{type} ? $spec->{type} : BOOLEAN;
next if $type == CODEREF;
next unless $attr;
if( my $fspec = $forced_optspecs{$attr} ) {
push @specs, $fspec;
} else {
my $reqopt_sym = '';
my $type_sym = '';
my $neg_sym = '';
$reqopt_sym = exists $spec->{optional} && !$spec->{optional} ? '=' : ':';
$type_sym = 's' if $type == SCALAR;
$type_sym = 's@' if $type == ARRAYREF or $type == ( SCALAR | ARRAYREF );
$neg_sym = '!' if $type == BOOLEAN and $spec->{default} and $spec->{default} eq '1';
# No required/optional symbol without a type to follow it
$reqopt_sym = '' unless $type_sym;
my $spec = join '', $attr, $reqopt_sym, $type_sym, $neg_sym;
push @specs, $spec;
}
}
return sort @specs;
}
sub remove_ignored_opts {
my $o = shift;
my %ignored_attrs = map { $_ => 1 } IGNORED_ATTRS();
foreach my $key ( keys %$o ) {
delete $o->{$key}, next if $ignored_attrs{$key};
delete $o->{$key}, next if !defined $o->{$key};
my $new_key = $key;
if( $new_key =~ s/-/_/g ) {
$o->{$new_key} = $o->{$key};
delete $o->{$key};
}
}
}
sub IGNORED_ATTRS { qw/ list help options slurp / }
# For forward compatibility until 'type' is specified for all dialect attributes
sub FORCED_OPTSPECS { (
'base-uri' => "base-uri:s",
'header-style' => "header-style:s",
'image-style' => "image-style:s",
'link-style' => "link-style:s",
'ordered-list-style' => "ordered-list-style:s",
'strip-tags' => "strip-tags:s\@",
'unordered-list-style' => "unordered-list-style:s",
'wiki-uri' => "wiki-uri:s\@",
) }
__END__
=head1 NAME
html2wiki - convert HTML into wiki markup
=head1 SYNOPSIS
html2wiki [options] [file]
Commonly used options:
--dialect=dialect Dialect name, e.g. "MediaWiki" (required unless
the WCDIALECT environment variable is used)
--encoding=encoding Source encoding (default is 'utf-8')
--base-uri=uri Base URI for relative links
--wiki-uri=uri URI fragment for wiki links
--wrap-in-html Wrap input in <html> and </html> (enabled by default).
Use --no-wrap-in-html to disable.
--escape-entities Escape HTML entities within text elements (enabled by
default). Use --no-escape-entities to disable.
--list List installed dialects and exit
--options List all recognized options (except for negations
such as --no-wrap-in-html)
--help Show this message and exit
Additional options, including those corresponding to dialect
attributes, are also supported. Consult the html2wiki man page for
details.
Example:
html2wiki --dialect MediaWiki --encoding iso-8859-1 \
--base-uri http://en.wikipedia.org/wiki/ \
--wiki-uri http://en.wikipedia.org/wiki/ \
input.html > output.wiki
=head1 DESCRIPTION
C<html2wiki> is a command-line interface to L<HTML::WikiConverter>,
which it uses to convert HTML to wiki markup.
=head1 DIALECTS
If the dialect you provide in C<--dialect> is not installed on your
system (e.g. if you specify C<MediaWiki> but have not installed its
dialect module, L<HTML::WikiConverter::MediaWiki>) a fatal error will
be issued. Use C<html2wiki --list> to list all available dialects on
your system. Additional dialects may be downloaded from the CPAN.
=head1 OPTIONS
=head2 Correspondence of options and attributes
Each of the options accepted by C<html2wiki> corresponds to an
HTML::WikiConverter attribute. Commonly used options described in
C<html2wiki --help> therefore correspond to attributes discussed in
L<HTML::WikiConverter/ATTRIBUTES>. That section also contains other
attributes that may be used as C<html2wiki> command-line options.
=head2 Mapping an attribute name to an option name
While related, option names are not identical to their corresponding
attribute names. The only difference is that attribute names use
underscores to separate words while option names use hyphens. For
example, the C<base_uri> attribute corresponds to the C<--base-uri>
command-line option.
=head2 Additional options defined in dialect modules
Individual dialects may define their own attributes, and therefore
make available their own command-line options to C<html2wiki>, in
addition to the ones defined by C<HTML::WikiConverter>. The same rules
described above apply for converting between these attribute names and
their corresponding command-line option names. For example, Markdown
supports an C<unordered_list_style> attribute that takes a string
value. To use this attribute on the command line, one would use the
C<--unordered-list-style> option. Consult individual dialect man pages
for a list of supported attributes.
=head2 Options that are enabled by default
Attributes that take boolean values may be enabled by default. The
C<wrap_in_html> attribute is one such example. Because of this,
C<html2wiki> will effectively behave by default as if
C<--wrap-in-html> had been specified in every invocation. If this is
not desired, the option name may be prefixed with C<no-> to disable
the option, as in C<--no-wrap-in-html>.
=head2 Options that take multiple values
Some attributes (eg, C<wiki_uri> and C<strip_tags>) accept an array of
values. To accommodate this in C<html2wiki>, such options can be
specified more than once on the command line. For example, to specify
that only comment and script elements should be stripped from HTML:
% html2wiki --strip-tags ~comment --strip-tags script ...
=head1 INPUT/OUTPUT
Input is taken from STDIN, so you may pipe the output from another
program into C<html2wiki>. For example:
curl http://example.com/input.html | html2wiki --dialect MediaWiki
You may also specify a file to read HTML from:
html2wiki --dialect MediaWiki input.html
Output is sent to STDOUT, though you may redirect it on the command
line:
html2wiki --dialect MediaWiki input.html > output.wiki
Or you may pipe it into another program:
html2wiki --dialect MediaWiki input.html | less
=head1 AUTHOR
David J. Iberri, C<< <diberri@cpan.org> >>
=head1 COPYRIGHT & LICENSE
Copyright 2006 David J. Iberri, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=head1 SEE ALSO
L<HTML::WikiConverter>
=cut
|