This file is indexed.

/usr/share/perl5/URI/Title/HTML.pm is in liburi-title-perl 1.86-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
=head NAME

URI::Title::HTML - get titles of html files

=cut

package URI::Title::HTML;

use warnings;
use strict;
use HTML::Entities;
use utf8;

our $CAN_USE_ENCODE;
BEGIN {
  eval { require Encode; Encode->import('decode') };
  $CAN_USE_ENCODE = !$@;
}

sub types {(
  'text/html',
  'default',
)}

sub title {
  my ($class, $url, $data, $type, $cset) = @_;

  my $title;
  my $special_case;

  my $default_match = '<title.*?>(.+?)</title';

  # special case for the iTMS.
  if ( $INC{'URI/Title/iTMS.pm'} and $url =~ m!phobos.apple.com! and $data =~ m!(itms://[^']*)! ) {
    return URI::Title::iTMS->title($1);
  }

  # TODO - work this out from the headers of the HTML
  if ($data =~ /charset=\"?([\w-]+)/i) {
    $cset = lc($1);
  }

  if ( $CAN_USE_ENCODE ) {
    $data = eval { decode('utf-8', $data, 1) } ||  eval { decode($cset, $data, 1) } || $data;
  }

  my $found_title;
  
  if ($url =~ /use\.perl\.org\/~([^\/]+).*journal\/\d/i) {
    $special_case = '<FONT FACE="geneva,verdana,sans-serif" SIZE="1"><B>(.+?)<';
    $title = "use.perl journal of $1 - ";

  } elsif ($url =~ /(pants\.heddley\.com|dailychump\.org).*#(.*)$/i) {
    my $id = $2;
    $special_case = 'id="a'.$id.'.*?></a>(.+?)<';
    $title = "pants daily chump - ";

  } elsif ($url =~ /paste\.husk\.org/i) {
    $special_case = 'Summary: (.+?)<';
    $title = "paste - ";

  } elsif ($url =~ /twitter.com\/(.*?)\/status(es)?\/\d+/i) {
    $special_case = '<p class="js-tweet-text tweet-text ">([^\<]+)';
    $title = "twitter - ";

  } elsif ($url =~ /independent\.co\.uk/i) {
    $special_case = '<h1 class=head1>(.+?)<';

  } elsif ($url =~ /www\.hs\.fi\/english\/article/i) {
    $special_case = '<h1>(.+?)</h1>';
  
  } elsif ($url =~ /google.com/i and $data =~ /calc_img/) {
    # google can be used as a calculator. Try to find the result.
    $special_case = 'calc_img.*<td nowrap>(.+?)</td';
  
  } elsif ($url =~ /spotify\.url\.fi/) {
    $special_case = '<title>\s*(.+?)\s+&mdash;\s+Decode\s+Spotify\s+URIs\s*</title>';

  }

  if (!$found_title and $special_case) {
    ($found_title) = $data =~ /$special_case/ims;
  }
  if (!$found_title) {
    ($found_title) = $data =~ /$default_match/ims;
  }
  return unless $found_title;

  $found_title =~ s/<sup>(.+?)<\/sup>/^$1/g; # for the google math output
  $found_title =~ s/<.*?>//g;
  $title .= $found_title;


  $title =~ s/\s+$//;
  $title =~ s/^\s+//;
  $title =~ s/\n+//g;
  $title =~ s/\s+/ /g;

  #use Devel::Peek;
  #Dump( $title );

  $title = decode_entities($title);

  #Dump( $title );

  # decode nasty number-encoded entities. Mostly works
  $title =~ s/(&\#(\d+);?)/chr($2)/eg;

  return $title;
}

1;