/usr/include/dclib-0.3/dclib/csearchindex.h

/***************************************************************************
                           csearchindex.h  -  description
                             -------------------
    begin                : Mon May 14 2003
    copyright            : (C) 2003-2004 by Mathias Küster
    email                : mathen@users.berlios.de
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#ifndef CSEARCHINDEX_H
#define CSEARCHINDEX_H

/**
  *@author Mathias Küster
  *
  * This handles the data about files in the share, and data on all
  * files ever hashed (which can be cleaned up with /rebuild).
  *
  * Before 0.3.19, there was some kind of "keyword index", which was
  * an extremely unhelpful design first because components of file names
  * only 1 or 2 characters long were not stored. This meant that 1 or 2
  * character components of searches were completely ignored, most
  * likely somebody wanted a specific episode of something but dclib
  * returned any 10 episodes. This was partially fixed by filtering
  * the results using the otherwise ignored components.
  *
  * However, the original search indexes also did not include components of
  * the path with the name, and worse, did not do substring matching.
  * As in, searching for "chick" will not find files with "chicks" in the name.
  *
  * So, the original system would either not return any results, or return a
  * load of results that didn't match.
  *
  * The new system is just a long list of the case folded path+name for each
  * file in the share. Searching it was moved to CFileManager, mainly because
  * a CKeywordIndex class briefly existed, which was mostly the old system
  * but with readable code.
  *
  * The share / hash database is implemented using CByteArray and memcmp(),
  * so is highly prone to error.
  *
  * The size of struct filebaseobject and hashbaseobject depends on the CPU
  * wordsize and possibly also the compiler. The data is probably also
  * CPU endian dependant.
  *
  * Unfortunately the length of the hash leaf data is stored as a ulonglong
  * at the start of the data pointed to by m_nHashLeavesIndex, it should
  * have been added to struct hashbaseobject instead.
  *
  * The hashes are not stored in any particular order, this would require
  * a higher level class.
  */

#include <dclib/dcos.h>

/* eFileTypes */
#include <dclib/core/types.h>

/* CString paramaters everywhere */
#include <dclib/core/cstring.h>

#include <set>

/* time_t */
#include <time.h>

#pragma pack(1)

typedef struct filebaseobject {
	/** */
	unsigned int m_eFileType;
	/** */
	ulonglong m_nSize;
	/** */
	unsigned long m_nPathIndex;
	/** */
	unsigned long m_nFileIndex;
	/** */
	unsigned long m_nHashIndex;
	/** */
	time_t m_tModTime;
} filebaseobject;

typedef struct hashbaseobject {
	/** */
	ulonglong m_nSize;
	/** */
	unsigned long m_nPathIndex;
	/** */
	unsigned long m_nFileIndex;
	/** */
	unsigned long m_nHashIndex;
	/** */
	unsigned long m_nHashLeavesIndex;
	/** */
	time_t m_tModTime;
} hashbaseobject;

#pragma pack()

class CByteArray;
class CFileInfo;
class CFile;

class CSearchIndex {

public:
	/** */
	CSearchIndex();
	/** */
	virtual ~CSearchIndex();

	/** */
	bool LoadIndex();
	/** */
	void SaveIndex();
	/** */
	void PrepareUpdate();
	/** */
	void FinishUpdate();
	/** */
	void ResetHashIndex();

	/** */
	std::set<unsigned long> * SearchHash( unsigned char * hash );

	/** */
	bool GetFileBaseObject( unsigned long index, struct filebaseobject * fbo );
	/** */
	bool GetFileBaseObjectDuringUpdate( unsigned long index, struct filebaseobject * fbo );
	/** */
	bool GetFileBaseObject( unsigned long index, struct filebaseobject * fbo, CString & filename );
	/** */
	bool GetFileBaseObjectDuringUpdate( unsigned long index, struct filebaseobject * fbo, CString & filename );
	/** */
	bool GetCaseFoldedName( unsigned long index, struct filebaseobject * fbo, CString & name );
	
	/** */
	CString GetFileName( unsigned long i );
	/** */
	CString GetFileNameDuringUpdate( unsigned long i );
	/** */
	unsigned long AddIndex( CFileInfo *fileinfo, CString path, eFileTypes filetype );
	/** Must be added in m_pBaseArray order */
	void AddSearchIndex( const CString & name );
	/** */
	void UpdateIndex( unsigned long index, struct filebaseobject * fbo );
	/** */
	unsigned long IndexCount();
	/** */
	unsigned long IndexCountDuringUpdate();
	
	/** */
	bool FindHash( unsigned char * hash, unsigned long * hi );
	/** */
	bool FindHashDuringUpdate( unsigned char * hash, unsigned long * hi );
	/** */
	CString GetHash( unsigned long hbi );
	/** */
	bool HashBaseIndexFromHashIndex( unsigned long hi, unsigned long * hbi );
	/** */
	bool HashBaseIndexFromHashIndexDuringUpdate( unsigned long hi, unsigned long * hbi );
	/** */
	bool BaseIndexFromHashBaseIndex( unsigned long hbi, unsigned long * bi );
	/** */
	void AddHashIndex( unsigned long filebaseindex, unsigned char * hash, unsigned char * leaves, unsigned long lsize );
	/** */
	bool FindHashBaseIndex( struct filebaseobject * fbo, unsigned long * hbi );
	/** */
	bool FindHashBaseIndexDuringUpdate( struct filebaseobject * fbo, unsigned long * hbi );
	/** */
	bool Compare( struct filebaseobject * fbo, struct hashbaseobject * hbo );
	/** */
	bool CompareDuringUpdate( struct filebaseobject * fbo, struct hashbaseobject * hbo );
	/** Get the TTH leaves for the given TTH */
	CByteArray * GetHashLeaves( CString tth );
	/** Appends newly created hash leaf data to hashleaves.bin */
	bool SaveNewHashLeaves();
	/**
	 * Rebuild the byte arrays, removing data for files not currently shared.
	 *
	 * Returns the number of errors detected (an m_n?????Index was out of range).
	 * Invalid indexes are set to (unsigned long)-1. If errors is non-zero
	 * a filelist refresh is needed.
	 */
	long RebuildLists();
	/**
	 * Validates all hash leaf data.
	 * If the leaf data does not match the root, the hashbaseobject is
	 * removed from the database. This causes the file to be re-hashed
	 * in the filelist refresh which follows. The data itself is not
	 * removed because that would make various indexes invalid.
	 *
	 * Returns the number of invalid entries removed. If that is > 0
	 * then the full filelist refresh routine must be started to
	 * re-hash the files.
	 */
	long ValidateHashLeaves();
	/** Print out some stats */
	void PrintDatabaseStats();
	/** Returns what LoadIndex() in constructor returned. */
	bool LoadedOK() const { return m_bLoadedOK; } ;

private:
	/**
	 * Reads the hash leaves from the file, returning true on success.
	 * The file must be not null and open, dest must not be null.
	 */
	bool ReadLeaves( CFile * file, unsigned long hli, CByteArray * dest );
	
	/** */
	bool m_bLoadedOK;

	/** read only */
	CByteArray * m_pFileBaseArray;
	/** read only */
	CByteArray * m_pPathBaseArray;
	/** read only */
	CByteArray * m_pBaseArray;
	
	/** read only */
	CByteArray * m_pHashBaseArray;
	/** read only */
	CByteArray * m_pHashFileBaseArray;
	/** read only */
	CByteArray * m_pHashPathBaseArray;
	/** read only */
	CByteArray * m_pHashIndex;
	/**
	 * Newly created hash leaf data to append to hashleaves.bin
	 * There is no need to load the whole file into memory.
	 */
	CByteArray * m_pNewHashLeaves;
	/** The size of hashleaves.bin */
	ulonglong hashleavessize;
	
	/** writeable */
	CByteArray * m_pUpdatingFileBaseArray;
	/** writeable */
	CByteArray * m_pUpdatingPathBaseArray;
	/** writeable */
	CByteArray * m_pUpdatingBaseArray;

	/** writeable */
	CByteArray * m_pUpdatingHashBaseArray;
	/** writeable */
	CByteArray * m_pUpdatingHashFileBaseArray;
	/** writeable */
	CByteArray * m_pUpdatingHashPathBaseArray;
	/** writeable */
	CByteArray * m_pUpdatingHashIndex;
	
	/** New store of case folded path+name indexes */
	CByteArray * m_pCaseFoldedBase;
	/** New store of case folded path+name strings */ 
	CByteArray * m_pCaseFoldedData;
	
	/** */
	CByteArray * m_pUpdatingCaseFoldedBase;
	/** */
	CByteArray * m_pUpdatingCaseFoldedData;
};

#endif
libdc-dev 0.3.24~svn3121-2.1 / usr / include / dclib-0.3 / dclib / csearchindex.h