This file is indexed.

/usr/bin/hxprintlinks is in html-xml-utils 6.5-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#! /usr/bin/perl
#
# Script to collect all <A> links in an HTML file, number them, and
# print a list of them at the end of the file.
#
# Part of HTML-XML-utils, see:
# http://www.w3.org/Tools/HTML-XML-utils/
#
# Author: Bert Bos <bert@w3.org>
# Created: 1 Feb 2001
# Version: $Id: hxprintlinks,v 1.2 2009/01/08 14:36:13 bbos Exp $

use Getopt::Std;
use strict;

sub START { 0 }
sub SEEN_A { 1 }		# After "<a"
sub SEEN_HREF { 2 }		# After "<a" and "href"
sub SEEN_EQ { 3 }		# After "<a", "href" and "="
sub SEEN_URL { 4 }		# After "<a", "href", "=" and a URL

# urlexpand -- expands a relative URL to an absolute one
sub urlexpand($$) {
  my ($url, $base) = @_;
  my $result;
  if ($url =~ /^\w+:/) {	# Already absolute
    $result = $url;		# Keep as is
  } elsif ($url =~ /^\//o) {	# Starts with '/'
    $base =~ /^\w+:(\/\/[^\/]*)?/o;
    $result = $&.$url;		# Prefix protocol and possibly machine
  } elsif ($url =~ /^[\#?]/o) {	# URL is fragment or query
    $result = $base.$url;	# Combine base and fragment/query
  } else {			# Starts with path segment
    $base =~ /[^\/:]*$/;
    $result = $`.$url;		# Prefix everything except last segment
    $result =~ s/\/[^\/]*\/\.\.//go; # Remove ".." where possible
  }
  return $result;
}

# print_links -- print OL list of URLs
sub print_links($$@) {
  my ($base, $n, @urls) = @_;
  print "\n<ol>\n";
  for (my $i = 1; $i <= $n; $i++) {
    print "<li>";
    print defined $base ? urlexpand($urls[$i], $base) : $urls[$i];
    print "</li>\n";
  }
  print "</ol>\n";
}

my $state = START;		# State machine
my $url;			# Most recent URL
my $n = 0;			# Length of @urls
my @urls;			# All URLs seen so far
my %options;			# Command line options
my $base;			# Base URL to make URLs absolute

my $PROG = substr($0, rindex($0, "/") + 1);
my $USAGE = "Usage: $PROG [-b base] [file]\n";

getopts('b:', \%options) || die $USAGE;
$base = $options{b} if defined $options{b};

while (<>) {			# Loop over lines
  while (/./) {			# Loop over tokens in a line
    if ($state == START) {
      if (/<a\b\s*/io) {
	print $`, $&;
	$_ = $';
	$state = SEEN_A;
      } elsif (/<\/body|<\/html/io) {
	print $`;
	print_links($base, $n, @urls) if ($n);
	$n = 0;			# Avoid printing list twice
	print $&;
	$_ = $';
      } else {
	print;
	$_ = '';
      }
    } elsif ($state == SEEN_A) {
      if (/[^>]*\bhref\b\s*/io) {
	print $`, $&;
	$_ = $';
	$state = SEEN_HREF;
      } elsif (/[^>]*>/o) {
	print $`, $&;
	$_ = $';
	$state = START;
      } else {
	print;
	$_ = '';
      }
    } elsif ($state == SEEN_HREF) {
      if (/\s*=\s*/o) {
	print $`, $&;
	$_ = $';
	$state = SEEN_EQ;
      } elsif (/[^>]*>/o) {
	print $`, $&;
	$_ = $';
	$state = START;
      } else {
	print;
	$_ = '';
      }
    } elsif ($state == SEEN_EQ) {
      if (/\s*\"([^\"]*)\"\s*/o) {
	$url = $1;
	print $`, $&;
	$_ = $';
	$state = SEEN_URL;
      } elsif (/\s*\'([^\']*)\'\s*/o) {
	$url = $1;
	print $`, $&;
	$_ = $';
	$state = SEEN_URL;
      } elsif (/[^>]*>/o) {
	print $`, $&;
	$_ = $';
	$state = START;
      } else {
	print;
	$_ = '';
      }
    } elsif ($state == SEEN_URL) {
      if (/[^>]*>/o) {
	print $`, $&;
	print "[", ++$n, "]";
	$urls[$n] = $url;
	$_ = $';
	$state = START;
      } else {
	print;
	$_ = '';
      }
    } else {
      die "Cannot happen";
    }
  }
}

print_links($base, $n, @urls) if ($n); # Seen no </body> or </html>