/usr/include/CLucene/index/SegmentHeader.h is in libclucene-dev 0.9.21b-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 | /*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_index_SegmentHeader_
#define _lucene_index_SegmentHeader_
#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif
#include "SegmentInfos.h"
#include "CLucene/util/BitSet.h"
#include "CLucene/util/VoidMap.h"
#include "Term.h"
#include "FieldInfos.h"
#include "FieldsReader.h"
#include "IndexReader.h"
#include "TermInfosReader.h"
#include "CompoundFile.h"
#include "CLucene/util/ThreadLocal.h"
CL_NS_DEF(index)
class SegmentReader;
class SegmentTermDocs:public virtual TermDocs {
int32_t _doc;
int32_t skipInterval;
int32_t numSkips;
int32_t skipCount;
CL_NS(store)::IndexInput* skipStream;
int32_t skipDoc;
int64_t freqPointer;
int64_t proxPointer;
int64_t skipPointer;
bool haveSkipped;
protected:
// SegmentReader parent
const SegmentReader* parent;
CL_NS(store)::IndexInput* freqStream;
int32_t count;
int32_t df;
int32_t _freq;
CL_NS(util)::BitSet* deletedDocs;
public:
virtual ~SegmentTermDocs();
virtual void seek(TermEnum* termEnum);
virtual void seek(Term* term);
virtual void seek(const TermInfo* ti);
virtual void close();
virtual int32_t doc()const;
virtual int32_t freq()const;
virtual bool next();
/** Optimized implementation. */
virtual int32_t read(int32_t* docs, int32_t* freqs, int32_t length);
/** Optimized implementation. */
virtual bool skipTo(const int32_t target);
virtual TermPositions* __asTermPositions();
///\param Parent must be a segment reader
SegmentTermDocs( const SegmentReader* Parent);
protected:
virtual void skippingDoc(){}
virtual void skipProx(int64_t proxPointer){}
};
class SegmentTermPositions: public SegmentTermDocs, public TermPositions {
private:
CL_NS(store)::IndexInput* proxStream;
int32_t proxCount;
int32_t position;
public:
///\param Parent must be a segment reader
SegmentTermPositions(const SegmentReader* Parent);
~SegmentTermPositions();
void seek(const TermInfo* ti);
void close();
int32_t nextPosition();
bool next();
int32_t read(int32_t* docs, int32_t* freqs, int32_t length);
virtual TermDocs* __asTermDocs();
virtual TermPositions* __asTermPositions();
//resolve SegmentTermDocs/TermPositions ambiguity
void seek(Term* term){ SegmentTermDocs::seek(term); }
void seek(TermEnum* termEnum){ SegmentTermDocs::seek(termEnum); }
int32_t doc() const{ return SegmentTermDocs::doc(); }
int32_t freq() const{ return SegmentTermDocs::freq(); }
bool skipTo(const int32_t target){ return SegmentTermDocs::skipTo(target); }
protected:
void skippingDoc();
/** Called by super.skipTo(). */
void skipProx(int64_t proxPointer);
};
/**
* An IndexReader responsible for reading 1 segment of an index
*/
class SegmentReader: public IndexReader{
/**
* The class Norm represents the normalizations for a field.
* These normalizations are read from an IndexInput in into an array of bytes called bytes
*/
class Norm :LUCENE_BASE{
int32_t number;
SegmentReader* reader;
const char* segment; ///< pointer to segment name
public:
CL_NS(store)::IndexInput* in;
uint8_t* bytes;
bool dirty;
//Constructor
Norm(CL_NS(store)::IndexInput* instrm, int32_t number, SegmentReader* reader, const char* segment);
//Destructor
~Norm();
void reWrite();
};
friend class SegmentReader::Norm;
//Holds the name of the segment that is being read
const char* segment;
//Indicates if there are documents marked as deleted
bool deletedDocsDirty;
bool normsDirty;
bool undeleteAll;
//Holds all norms for all fields in the segment
typedef CL_NS(util)::CLHashtable<const TCHAR*,Norm*,CL_NS(util)::Compare::TChar, CL_NS(util)::Equals::TChar> NormsType;
NormsType _norms;
uint8_t* ones;
uint8_t* fakeNorms();
// Compound File Reader when based on a compound file segment
CompoundFileReader* cfsReader;
///Reads the Field Info file
FieldsReader* fieldsReader;
TermVectorsReader* termVectorsReaderOrig;
CL_NS(util)::ThreadLocal<TermVectorsReader*,
CL_NS(util)::Deletor::Object<TermVectorsReader> >termVectorsLocal;
void initialize(SegmentInfo* si);
/**
* Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
* @return TermVectorsReader
*/
TermVectorsReader* getTermVectorsReader();
protected:
///Marks document docNum as deleted
void doDelete(const int32_t docNum);
void doUndeleteAll();
void doCommit();
void doSetNorm(int32_t doc, const TCHAR* field, uint8_t value);
// can return null if norms aren't stored
uint8_t* getNorms(const TCHAR* field);
public:
/**
Func - Constructor.
Opens all files of a segment
.fnm -> Field Info File
Field names are stored in the field info file, with suffix .fnm.
.frq -> Frequency File
The .frq file contains the lists of documents which contain
each term, along with the frequency of the term in that document.
.prx -> Prox File
The prox file contains the lists of positions that each term occurs
at within documents.
.tis -> Term Info File
This file is sorted by Term. Terms are ordered first lexicographically
by the term's field name, and within that lexicographically by the term's text.
.del -> Deletion File
The .del file is optional, and only exists when a segment contains deletions
.f[0-9]* -> Norm File
Contains s, for each document, a byte that encodes a value that is
multiplied into the score for hits on that field:
*/
SegmentReader(SegmentInfo* si);
SegmentReader(SegmentInfos* sis, SegmentInfo* si);
///Destructor.
virtual ~SegmentReader();
///Closes all streams to the files of a single segment
void doClose();
///Checks if a segment managed by SegmentInfo si has deletions
static bool hasDeletions(const SegmentInfo* si);
bool hasDeletions() const;
bool hasNorms(const TCHAR* field) const;
///Returns all file names managed by this SegmentReader
void files(CL_NS(util)::AStringArrayWithDeletor& retarray);
///Returns an enumeration of all the Terms and TermInfos in the set.
TermEnum* terms() const;
///Returns an enumeration of terms starting at or after the named term t
TermEnum* terms(const Term* t) const;
///Gets the document identified by n
bool document(int32_t n, CL_NS(document)::Document* doc);
///Checks if the n-th document has been marked deleted
bool isDeleted(const int32_t n);
///Returns an unpositioned TermDocs enumerator.
TermDocs* termDocs() const;
///Returns an unpositioned TermPositions enumerator.
TermPositions* termPositions() const;
///Returns the number of documents which contain the term t
int32_t docFreq(const Term* t) const;
///Returns the actual number of documents in the segment
int32_t numDocs();
///Returns the number of all the documents in the segment including the ones that have
///been marked deleted
int32_t maxDoc() const;
///Returns the bytes array that holds the norms of a named field.
///Returns fake norms if norms aren't available
uint8_t* norms(const TCHAR* field);
///Reads the Norms for field from disk
void norms(const TCHAR* field, uint8_t* bytes);
///concatenating segment with ext and x
char* SegmentName(const char* ext, const int32_t x=-1);
///Creates a filename in buffer by concatenating segment with ext and x
void SegmentName(char* buffer,int32_t bufferLen,const char* ext, const int32_t x=-1 );
/**
* @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
*/
void getFieldNames(FieldOption fldOption, CL_NS(util)::StringArrayWithDeletor& retarray);
static bool usesCompoundFile(SegmentInfo* si);
/** Return a term frequency vector for the specified document and field. The
* vector returned contains term numbers and frequencies for all terms in
* the specified field of this document, if the field had storeTermVector
* flag set. If the flag was not set, the method returns null.
* @throws IOException
*/
TermFreqVector* getTermFreqVector(int32_t docNumber, const TCHAR* field=NULL);
/** Return an array of term frequency vectors for the specified document.
* The array contains a vector for each vectorized field in the document.
* Each vector vector contains term numbers and frequencies for all terms
* in a given vectorized field.
* If no such fields existed, the method returns null.
* @throws IOException
*/
bool getTermFreqVectors(int32_t docNumber, Array<TermFreqVector*>& result);
private:
//Open all norms files for all fields
void openNorms(CL_NS(store)::Directory* cfsDir);
//Closes all norms files
void closeNorms();
///a bitVector that manages which documents have been deleted
CL_NS(util)::BitSet* deletedDocs;
///an IndexInput to the frequency file
CL_NS(store)::IndexInput* freqStream;
///For reading the fieldInfos file
FieldInfos* fieldInfos;
///For reading the Term Dictionary .tis file
TermInfosReader* tis;
///an IndexInput to the prox file
CL_NS(store)::IndexInput* proxStream;\
static bool hasSeparateNorms(SegmentInfo* si);
static uint8_t* createFakeNorms(int32_t size);
//allow various classes to access the internals of this. this allows us to have
//a more tight idea of the package
friend class IndexReader;
friend class IndexWriter;
friend class SegmentTermDocs;
friend class SegmentTermPositions;
friend class MultiReader;
};
CL_NS_END
#endif
|