/usr/lib/perl5/KinoSearch1/Index/TermInfosReader.pm is in libkinosearch1-perl 1.00-1build3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | package KinoSearch1::Index::TermInfosReader;
use strict;
use warnings;
use KinoSearch1::Util::ToolSet;
use base qw( KinoSearch1::Util::Class );
BEGIN {
__PACKAGE__->init_instance_vars(
# constructor params / members
invindex => undef,
seg_name => undef,
finfos => undef,
# members
orig_enum => undef,
index_enum => undef,
);
}
use KinoSearch1::Index::SegTermEnum;
sub init_instance {
my $self = shift;
my $invindex = $self->{invindex};
# prepare a main Enum which can access all terms
$self->{orig_enum} = KinoSearch1::Index::SegTermEnum->new(
finfos => $self->{finfos},
instream => $invindex->open_instream("$self->{seg_name}.tis"),
);
# load an index Enum into memory which can point to places in main
$self->{index_enum} = KinoSearch1::Index::SegTermEnum->new(
finfos => $self->{finfos},
instream => $invindex->open_instream("$self->{seg_name}.tii"),
is_index => 1,
);
$self->{index_enum}->fill_cache;
}
# Return a SegTermEnum, pre-located at the right spot if a Term is supplied.
sub terms {
my ( $self, $term ) = @_;
if ( defined $term ) {
$self->fetch_term_info($term);
}
else {
$self->{orig_enum}->reset;
}
return $self->{orig_enum}->clone_enum;
}
# Given a Term, return a TermInfo if the Term is present in the segment, or
# undef if it's not.
sub fetch_term_info {
my ( $self, $term ) = @_;
my $termstring = $term->get_termstring( $self->{finfos} );
# termstring will be undefined if field doesn't exist
return unless defined $termstring;
$self->_seek_enum($termstring);
return $self->_scan_enum($termstring);
}
# Locate the main Enum as close as possible to where the term might be found.
sub _seek_enum {
my ( $self, $termstring ) = @_;
my $index_enum = $self->{index_enum};
# get the approximate possible location of the term in the main Enum
my $tii_position = $index_enum->scan_cache($termstring);
my $ballpark_termstring = $index_enum->get_termstring;
my $ballpark_tinfo = $index_enum->get_term_info;
# point the main Enum just before the term
$self->{orig_enum}->seek(
$ballpark_tinfo->get_index_fileptr,
( ( $tii_position * $self->{orig_enum}->get_index_interval ) - 1 ),
$ballpark_termstring,
$ballpark_tinfo,
);
}
# One-by-one targeted iteration through TermEnum.
sub _scan_enum {
my ( $self, $target_termstring ) = @_;
my $orig_enum = $self->{orig_enum};
# iterate through the Enum until the result is ge the term
$orig_enum->scan_to($target_termstring);
# if the stopping point matches the target, return info; otherwise, undef
my $found_termstring = $orig_enum->get_termstring;
if ( defined $found_termstring
and $found_termstring eq $target_termstring )
{
return $orig_enum->get_term_info;
}
return;
}
sub get_skip_interval {
shift->{orig_enum}->get_skip_interval;
}
sub close {
my $self = shift;
$self->{orig_enum}->close;
$self->{index_enum}->close;
}
1;
__END__
=begin devdocs
=head1 NAME
KinoSearch1::Index::TermInfosReader - look up Terms in an invindex
=head1 DESCRIPTION
A TermInfosReader manages the relationship between two SegTermEnum objects - a
primary and an index.
It would be possible, though extremely inefficient, to scan through a single
SegTermEnum every time you wanted to know about a Term. Having an index makes
the process much quicker, and you need a TermInfosReader to deal with the
index.
=head1 COPYRIGHT
Copyright 2005-2010 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
See L<KinoSearch1> version 1.00.
=end devdocs
=cut
|