This file is indexed.

/usr/share/perl5/XMLTV/Get_nice.pm is in libxmltv-perl 0.5.70-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# $Id: Get_nice.pm,v 1.37 2017/09/12 00:38:58 knowledgejunkie Exp $
#
# Library to wrap LWP::UserAgent to put in a random delay between
# requests and set the User-Agent string.  We really should be using
# LWP::RobotUI but this is better than nothing.
#
# If you're sure your app doesn't need a random delay (because it is
# fetching from a site designed for that purpose) then set
# $XMLTV::Get_nice::Delay to zero, or a value in seconds.  This is the
# maximum delay - on average the sleep will be half that.
#
#This random delay will be between 0 and 5 ($Delay) seconds. This means
# some sites will complain you're grabbing too fast (since 20% of your
# grabs will be less than 1 second apart). To introduce a minimum delay
# set $XMLTV::Get_nice::MinDelay to a value in seconds.
# This will be added to $Delay to derive the actual delay used.
# E.g. Delay = 5 and MinDelay = 3, then the actual delay will be
# between 3 and 8 seconds,
#
# get_nice() is the function to call, however
# XMLTV::Get_nice::get_nice_aux() is the one to cache with
# XMLTV::Memoize or whatever.  If you want an HTML::Tree object use
# get_nice_tree().
# Alternatively, get_nice_json() will get you a JSON object,
# or get_nice_xml() will get a XML::Parser 'Tree' object
#

use strict;

package XMLTV::Get_nice;

# use version number for feature detection:
# 0.005065 : new methods get_nice_json(), get_nice_xml()
# 0.005065 : add decode option to get_nice_tree()
# 0.005065 : expose the LWP response object ($Response)
# 0.005066 : support unknown tags in HTML::TreeBuilder ($IncludeUnknownTags)
# 0.005067 : new method post_nice_json()
# 0.005070 : skip get_nice sleep for cached pages
# 0.005070 : support passing HTML::TreeBuilder options via a hashref
our $VERSION = 0.005070;

use base 'Exporter';
our @EXPORT = qw(get_nice get_nice_tree get_nice_xml get_nice_json post_nice_json error_msg);
use Encode qw(decode);
use LWP::UserAgent;
use XMLTV;
our $Delay = 5; # in seconds
our $MinDelay = 0; # in seconds
our $FailOnError = 1; # Fail on fetch error
our $Response; # LWP response object
our $IncludeUnknownTags = 0; # add support for HTML5 tags which are unknown to older versions of TreeBuilder (and therfore ignored by it)



our $ua = LWP::UserAgent->new;
$ua->agent("xmltv/$XMLTV::VERSION");
$ua->env_proxy;
our %errors = ();


sub error_msg($) {
    my ($url) = @_;
    $errors{$url};
}
sub get_nice( $ ) {
    # This is to ensure scalar context, to work around weirdnesses
    # with Memoize (I just can't figure out how SCALAR_CACHE and
    # LIST_CACHE relate to each other, with or without MERGE).
    #
    return scalar get_nice_aux($_[0]);
}

# Fetch page and return as HTML::Tree object.
# Optional arguments:
#   i) a function to put the page data through (eg, to clean up bad characters)
#      before parsing.
#  ii) convert incoming page to UNICODE using this codepage (use "UTF-8" for
#      strict utf-8)
# iii) a hashref containing options to configure the HTML::TreeBuilder object 
#      before parsing
#
sub get_nice_tree( $;$$$ ) {
    my ($uri, $filter, $codepage, $htb_opts) = @_;
    require HTML::TreeBuilder;
    my $content = get_nice $uri;
    $content = $filter->($content) if $filter;
    if ($codepage) {
        $content = decode($codepage, $content);
    }
    else {
        $content = decode('UTF-8', $content);
    }

    my $t = HTML::TreeBuilder->new();
    $t->ignore_unknown(!$IncludeUnknownTags);

    if (ref $htb_opts eq 'HASH') {
        $t->$_($htb_opts->{$_}) foreach (keys %$htb_opts);
    }

    $t->parse($content) or die "cannot parse content of $uri\n";
    $t->eof;
    return $t;
}

# Fetch page and return as XML::Parser 'Tree' object.
# Optional arguments:
# i) a function to put the page data through (eg, to clean up bad
# characters) before parsing.
# ii) convert incoming page to UNICODE using this codepage (use "UTF-8" for  strict utf-8)
#
sub get_nice_xml( $;$$ ) {
    my ($uri, $filter, $codepage) = @_;
    require XML::Parser;
    my $content = get_nice $uri;
    $content = $filter->($content) if $filter;
    if ($codepage) {
      $content = decode($codepage, $content);
    }
    else {
      $content = decode('UTF-8', $content);
    }
    my $t = XML::Parser->new(Style => 'Tree')->parse($content) or die "cannot parse content of $uri\n";
    return $t;
}

# Fetch page and return as JSON::PP object.
# Optional arguments:
# i) a function to put the page data through (eg, to clean up bad
# characters) before parsing.
# ii) convert incoming UTF-8 to UNICODE
#
sub get_nice_json( $;$$ ) {
    my ($uri, $filter, $utf8) = @_;
    require JSON::PP;
    my $content = get_nice $uri;
    $content = $filter->($content) if $filter;
    $utf8 = defined $utf8 ? 1 : 0;
    my $t = JSON::PP->new()->utf8($utf8)->decode($content) or die "cannot parse content of $uri\n";
    return $t;
}

my $last_get_time;
my $last_get_from_cache;
sub get_nice_aux( $ ) {
    my $url = shift;

    if (defined $last_get_time && (defined $last_get_from_cache && !$last_get_from_cache) ) {
        # A page has already been retrieved recently.  See if we need
        # to sleep for a while before getting the next page - being
        # nice to the server.
        #
        my $next_get_time = $last_get_time + (rand $Delay) + $MinDelay;
        my $sleep_time = $next_get_time - time();
        sleep $sleep_time if $sleep_time > 0;
    }

    my $r = $ua->get($url);

    # Then start the delay from this time on the next fetch - so we
    # make the gap _between_ requests rather than from the start of
    # one request to the start of the next.  This punishes modem users
    # whose individual requests take longer, but it also punishes
    # downloads that take a long time for other reasons (large file,
    # slow server) so it's about right.
    #
    $last_get_time = time();

    # expose the response object for those grabbers which need to process the headers, status code, etc.
    $Response = $r;

    # set flag if last fetch was from cache 
    $last_get_from_cache = (defined $r->{'_headers'}{'x-cached'} && $r->{'_headers'}{'x-cached'} == 1);

    if ($r->is_error) {
        # At the moment download failures seem rare, so the script dies if
        # any page cannot be fetched.  We could later change this routine
        # to return undef on failure.  But dying here makes sure that a
        # failed page fetch doesn't get stored in XMLTV::Memoize's cache.
        #
        die "could not fetch $url, error: " . $r->status_line . ", aborting\n" if $FailOnError;
        $errors{$url} = $r->status_line;
        return undef;
    } else {
        return $r->content;
    }

}

# Fetch page via a JSON object in the Content and return as a JSON object.  
# Arguments:
#    URI to post to
#    JSON object with the AJAX data to be posted e.g. "{ 'programId':'123456', 'channel':'BBC'}"
#
sub post_nice_json( $$ ) {
    my $url = shift;
    my $json = shift;

    require JSON::PP;

    if (defined $last_get_time) {
        # A page has already been retrieved recently.  See if we need
        # to sleep for a while before getting the next page 
        #
        my $next_get_time = $last_get_time + (rand $Delay) + $MinDelay;
        my $sleep_time = $next_get_time - time();
        sleep $sleep_time if $sleep_time > 0;
    }

    my $r = $ua->post($url, 'Content_Type' => 'application/json; charset=utf-8', 'Content' => $json);

    $last_get_time = time();

    # expose the response object for those grabbers which need to process the headers, status code, etc.
    $Response = $r;

    if ($r->is_error) {
        die "could not fetch $url, error: " . $r->status_line . ", aborting\n" if $FailOnError;
        $errors{$url} = $r->status_line;
        return undef;
    } else {
        my $content = JSON::PP->new()->utf8(1)->decode($r->content) or die "cannot parse content of $url\n";
        return $content;
    }
}

1;