/usr/share/perl5/Parse/MediaWikiDump/page.pm is in libparse-mediawikidump-perl 1.0.6-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | package Parse::MediaWikiDump::page;
our $VERSION = '1.0.4';
use strict;
use warnings;
use List::Util;
sub new {
my ($class, $data, $category_anchor, $case_setting, $namespaces) = @_;
my $self = {};
bless($self, $class);
$$self{DATA} = $data;
$$self{CACHE} = {};
$$self{CATEGORY_ANCHOR} = $category_anchor;
$$self{NAMESPACES} = $namespaces;
return $self;
}
sub namespace {
my ($self) = @_;
my $title = $self->title;
my $namespace = '';
return $$self{CACHE}{namespace} if defined $$self{CACHE}{namespace};
if ($title =~ m/^([^:]+):(.*)/) {
foreach (@{ $self->{NAMESPACES} } ) {
my ($num, $name) = @$_;
if ($1 eq $name) {
$namespace = $1;
last;
}
}
}
$$self{CACHE}{namespace} = $namespace;
return $namespace;
}
sub categories {
my ($self) = @_;
my $anchor = $$self{CATEGORY_ANCHOR};
return $$self{CACHE}{categories} if defined($$self{CACHE}{categories});
my $text = $$self{DATA}{text};
my @cats;
while($text =~ m/\[\[$anchor:\s*([^\]]+)\]\]/gi) {
my $buf = $1;
#deal with the pipe trick
$buf =~ s/\|.*$//;
push(@cats, $buf);
}
return undef if scalar(@cats) == 0;
$$self{CACHE}{categories} = \@cats;
return \@cats;
}
sub redirect {
my ($self) = @_;
my $text = $$self{DATA}{text};
return $$self{CACHE}{redirect} if exists($$self{CACHE}{redirect});
if ($text =~ m/^#redirect\s*:?\s*\[\[([^\]]*)\]\]/i) {
$$self{CACHE}{redirect} = $1;
return $1;
} else {
$$self{CACHE}{redirect} = undef;
return undef;
}
}
sub title {
my ($self) = @_;
return $$self{DATA}{title};
}
sub id {
my ($self) = @_;
return $$self{DATA}{id};
}
sub revision_id {
my ($self) = @_;
return $$self{DATA}{revision_id};
}
sub timestamp {
my ($self) = @_;
return $$self{DATA}{timestamp};
}
sub username {
my ($self) = @_;
return $$self{DATA}{username};
}
sub userid {
my ($self) = @_;
return $$self{DATA}{userid};
}
sub userip {
my ($self) = @_;
return $$self{DATA}{userip};
}
sub minor {
my ($self) = @_;
return $$self{DATA}{minor};
}
sub text {
my ($self) = @_;
return \$$self{DATA}{text};
}
1;
__END__
=head1 NAME
Parse::MediaWikiDump::page - Object representing a specific revision of a MediaWiki page
=head1 ABOUT
This object is returned from the "next" method of Parse::MediaWikiDump::Pages
and Parse::MediaWikiDump::Revisions. You most likely will not be creating instances
of this particular object yourself instead you use this object to access the information
about a page in a MediaWiki instance.
=head1 SYNOPSIS
$pages = Parse::MediaWikiDump::Pages->new('pages-articles.xml');
#get all the records from the dump files, one record at a time
while(defined($page = $pages->next)) {
print "title '", $page->title, "' id ", $page->id, "\n";
}
=head1 STATUS
This software is being RETIRED - MediaWiki::DumpFile is the official successor to
Parse::MediaWikiDump and includes a compatibility library called MediaWiki::DumpFile::Compat
that is 100% API compatible and is a near perfect standin for this module. It is faster
in all instances where it counts and is actively maintained. Any undocumented deviation
of MediaWiki::DumpFile::Compat from Parse::MediaWikiDump is considered a bug and will
be fixed.
=head1 METHODS
=over 4
=item $page->redirect
Returns an empty string (such as '')
for the main namespace or a string
containing the name of the namespace.
=item $page->categories
Returns a reference to an array that
contains a list of categories or undef
if there are no categories. This method
does not understand templates and may
not return all the categories the article actually belongs in.
=item $page->title
Returns a string of the full article title including the namespace if present
=item $page->namespace
Returns a string of the namespace of the article or an empty string if the article is in the default namespace
=item $page->id
Returns a number that is the id for the page in the MediaWiki instance
=item $page->revision_id
Returns a number that is the revision id for the page in the MediaWiki instance
=item $page->timestamp
Returns a string in the following format: 2005-07-09T18:41:10Z
=item $page->username
Returns a string of the username responsible for this specific revision of the article or undef if the editor was anonymous
=item $page->userid
Returns a number that is the id for the user returned by $page->username or undef if the editor was anonymous
=item $page->userip
Returns a string of the IP of the editor if the edit was anonymous or undef otherwise
=item $page->minor
Returns 1 if this article was flaged as a minor edit otherwise returns 0
=item $page->text
Returns a reference to a string that contains the article title text
=back
|