This file is indexed.

/usr/bin/getpdftext is in libcam-pdf-perl 1.60-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell

package main;

use warnings;
use strict;
use CAM::PDF;
use Getopt::Long;
use Pod::Usage;

our $VERSION = '1.60';

my %opts = (
            check      => 0,
            geom       => 0,
            verbose    => 0,
            help       => 0,
            version    => 0,
            );

Getopt::Long::Configure('bundling');
GetOptions('g|geometry' => \$opts{geom},
           'c|check'    => \$opts{check},
           'v|verbose'  => \$opts{verbose},
           'h|help'     => \$opts{help},
           'V|version'  => \$opts{version},
           ) or pod2usage(1);
if ($opts{help})
{
   pod2usage(-exitstatus => 0, -verbose => 2);
}
if ($opts{version})
{
   print "CAM::PDF v$CAM::PDF::VERSION\n";
   exit 0;
}

if (@ARGV < 1)
{
   pod2usage(1);
}

my $file = shift;
my $pagelist = shift;

my $doc = CAM::PDF->new($file) || die "$CAM::PDF::errstr\n";

foreach my $p ($doc->rangeToArray(1,$doc->numPages(),$pagelist))
{
   if ($opts{check})
   {
      print "Checking page $p\n";
      my $tree = $doc->getPageContentTree($p, $opts{verbose});
      if (!$tree || !$tree->validate())
      {
         print "  Failed\n";
      }
      if ($opts{geom})
      {
         $tree->computeGS();
      }
   }
   else
   {
      my $str = $doc->getPageText($p, $opts{verbose});
      if (defined $str)
      {
         CAM::PDF->asciify(\$str);
         print $str;
      }
   }
}


__END__

=for stopwords getpdftext.pl

=head1 NAME

getpdftext.pl - Extracts and print the text from one or more PDF pages

=head1 SYNOPSIS

 getpdftext.pl [options] infile.pdf [<pagenums>]

 Options:
   -c --check          just validates the page instead of printing it
   -g --geometry       just computes geometry, prints nothing
   -v --verbose        print diagnostic messages
   -h --help           verbose help message
   -V --version        print CAM::PDF version

 <pagenums> is a comma-separated list of page numbers.
      Ranges like '2-6' allowed in the list
      Example: 4-6,2,12,8-9

=head1 DESCRIPTION

Extracts all of the text from the specified PDF page(s) and prints
them to STDOUT.  If no pages are specified, all pages are processed.

The C<--check> and C<--geometry> modes are distinctly different.  They are
used primarily for debugging.

=head1 SEE ALSO

CAM::PDF

F<renderpdf.pl>

=head1 AUTHOR

See L<CAM::PDF>

=cut