/usr/include/CLucene/index/MultiReader.h

/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_index_MultiReader
#define _lucene_index_MultiReader

#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif

#include "SegmentHeader.h"
#include "IndexReader.h"
#include "CLucene/document/Document.h"
#include "Terms.h"
#include "SegmentMergeQueue.h"

CL_NS_DEF(index)

/** An IndexReader which reads multiple indexes, appending their content.
*/
class MultiTermDocs:public virtual TermDocs {
private:
protected:
	TermDocs** readerTermDocs;

	IndexReader** subReaders;
  int32_t subReadersLength;
	const int32_t* starts;
	Term* term;

	int32_t base;
	int32_t pointer;

	TermDocs* current;              // == segTermDocs[pointer]
public:
	MultiTermDocs();
	MultiTermDocs(IndexReader** subReaders, const int32_t* s);
	virtual ~MultiTermDocs();

	int32_t doc() const;
	int32_t freq() const;

    void seek(TermEnum* termEnum);
	void seek(Term* tterm);
	bool next();

	/** Optimized implementation. */
	int32_t read(int32_t* docs, int32_t* freqs, int32_t length);

	/** As yet unoptimized implementation. */
	bool skipTo(const int32_t target);

	void close();

	virtual TermPositions* __asTermPositions();
protected:
	virtual TermDocs* termDocs(const IndexReader* reader) const;
private:
	TermDocs* termDocs(const int32_t i) const;

};


//MultiTermEnum represents the enumeration of all terms of all readers
class MultiTermEnum:public TermEnum {
private:
	SegmentMergeQueue* queue;

	Term* _term;
	int32_t _docFreq;
public:
	//Constructor
	//Opens all enumerations of all readers
	MultiTermEnum(IndexReader** subReaders, const int32_t* starts, const Term* t);

	//Destructor
	~MultiTermEnum();

	//Move the current term to the next in the set of enumerations
	bool next();

	//Returns a pointer to the current term of the set of enumerations
	Term* term();
	Term* term(bool pointer);

	//Returns the document frequency of the current term in the set
	int32_t docFreq() const;

	//Closes the set of enumerations in the queue
	void close();

		
	const char* getObjectName(){ return MultiTermEnum::getClassName(); }
	static const char* getClassName(){ return "MultiTermEnum"; }
};


class MultiTermPositions:public MultiTermDocs,public TermPositions {
public:
	MultiTermPositions(IndexReader** subReaders, const int32_t* s);
	~MultiTermPositions() {};
	int32_t nextPosition();


	virtual TermDocs* __asTermDocs();
	virtual TermPositions* __asTermPositions();
protected:
	TermDocs* termDocs(const IndexReader* reader) const;
};


class MultiReader:public IndexReader{
private:
	bool _hasDeletions;
	IndexReader** subReaders;
	int32_t subReadersLength;
	int32_t* starts;			  // 1st docno for each segment

	CL_NS(util)::CLHashtable<const TCHAR*,uint8_t*,
		CL_NS(util)::Compare::TChar,
			CL_NS(util)::Equals::TChar,
		CL_NS(util)::Deletor::tcArray,
		CL_NS(util)::Deletor::Array<uint8_t> > normsCache;
	int32_t _maxDoc;
	int32_t _numDocs;
	void initialize(IndexReader** subReaders);
  
	int32_t readerIndex(const int32_t n) const;
	
	bool hasNorms(const TCHAR* field);
	uint8_t* ones;
	uint8_t* fakeNorms();
protected:
	void doSetNorm(int32_t n, const TCHAR* field, uint8_t value);
	void doUndeleteAll();
	void doCommit();
	// synchronized
	void doClose();
	
	// synchronized
	void doDelete(const int32_t n);
public:
	/** Construct reading the named set of readers. */
	MultiReader(CL_NS(store)::Directory* directory, SegmentInfos* sis, IndexReader** subReaders);

	/**
	* <p>Construct a MultiReader aggregating the named set of (sub)readers.
	* Directory locking for delete, undeleteAll, and setNorm operations is
	* left to the subreaders. </p>
	* <p>Note that all subreaders are closed if this Multireader is closed.</p>
	* @param subReaders set of (sub)readers
	* @throws IOException
	*/
	MultiReader(IndexReader** subReaders);

	~MultiReader();

	/** Return an array of term frequency vectors for the specified document.
	*  The array contains a vector for each vectorized field in the document.
	*  Each vector vector contains term numbers and frequencies for all terms
	*  in a given vectorized field.
	*  If no such fields existed, the method returns null.
	*/
	bool getTermFreqVectors(int32_t n, Array<TermFreqVector*>& result);
	TermFreqVector* getTermFreqVector(int32_t n, const TCHAR* field);


	// synchronized
	int32_t numDocs();

	int32_t maxDoc() const;

	bool document(int32_t n, CL_NS(document)::Document* doc);

	bool isDeleted(const int32_t n);
	bool hasDeletions() const{ return _hasDeletions; }

	// synchronized
	uint8_t* norms(const TCHAR* field);
	void norms(const TCHAR* field, uint8_t* result);

	TermEnum* terms() const;
	TermEnum* terms(const Term* term) const;
	
	//Returns the document frequency of the current term in the set
	int32_t docFreq(const Term* t=NULL) const;
	TermDocs* termDocs() const;
	TermPositions* termPositions() const;
	
		
	/**
	* @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
	*/
	void getFieldNames(FieldOption fldOption, CL_NS(util)::StringArrayWithDeletor& retarray);
};


CL_NS_END
#endif
libclucene-dev 0.9.21b-2 / usr / include / CLucene / index / MultiReader.h