/usr/include/ui-utilcpp/Recoder.hpp

/**
 * @file
 */
#ifndef UI_UTIL_RECODER_HPP
#define UI_UTIL_RECODER_HPP

// STDC++
#include <string>
#include <map>

// C libraries
#ifdef WIN32
#define PARAMS(Args) Args
#else
#include <recodext.h>
#endif

// C++ libraries
#include <ui-utilcpp/Exception.hpp>
#include <ui-utilcpp/Misc.hpp>

namespace UI {
namespace Util {

/** @brief Holder class for conversion results. */
class Conversion
{
private:
	char const * const cStr_;
	size_t const cSize_;

public:
	/** @brief Constructor. */
	Conversion(char const * const cStr, size_t const cSize);
	virtual ~Conversion();

	/** @brief Get conversion data.
	 *
	 * @note: This is a C-String, i.e., it always ends in a
	 * zero-byte. However, there also may be zero-bytes in between
	 * (depending on the encoding); you would also need getSize() in
	 * that case.
	 */
	char const * get() const;

	/** @brief Get size (in bytes) of conversion data. */
	size_t getSize() const;

	/** @brief Get std::string copy of conversion data.
	 *
	 * @note This copies the whole conversion (not only the C-String)
	 * into the std::string (@see get()).
	 */
	std::string getString() const;
};


/** @brief Conversion w/ std::free call on the supplied C-String. */
class StdFreeConversion: public Conversion
{
private:
	void * freeObject_;
public:
	/** @brief Constructor. cStr will be std::free'd. */
	StdFreeConversion(char * cStr, size_t const cSize);
	virtual ~StdFreeConversion();
};

/** @brief Conversion w/ std::string holder. */
class StringConversion: public Conversion
{
private:
	std::string * delObject_;
public:
	/** @brief Constructor. str will be delete'd. */
	StringConversion(std::string * str);
	virtual ~StringConversion();
};


/** @brief Factory for Conversions. */
class Converter
{
public:
	/** @brief Error codes for exceptions. */
	enum ErrorCode
	{
		EncUnknown_,
		ConversionErr_,
		UnknownErr_
	};
	/** @brief Exceptions for this class. */
	typedef CodeException<ErrorCode> Exception;

	/** @brief Converter base constructor. */
	Converter(std::string const & inEnc, std::string const & outEnc, bool const & sloppy=false);
	virtual ~Converter();

	/** @brief Convert from byte buffer. */
	virtual Conversion const * make(char const * const buf, size_t const bufSize) const = 0;

	/** @brief Convert from C-String. */
	Conversion const * make(char const * const cStr) const;

	/** @brief Convert from std::string. */
	Conversion const * make(std::string const & str) const;

	/** @brief Get id (informational). */
	std::string getID() const;

protected:
	/** @name Input/output encoding and sloppy conversion flag.
	 * @{ */
	std::string const inEnc_;
	std::string const outEnc_;
	bool const sloppy_;
	//* @} */
};


#ifndef WIN32
/** @brief Converter based on librecode (see there). */
class LibRecodeConverter: public Converter
{
private:
	RECODE_OUTER outer_;
	RECODE_REQUEST request_;

	/** @brief Verbatim from librecode. */
	static void guarantee_nul_terminator(RECODE_TASK task);

public:
	/** @brief Constructor; this supports sloppy encoding if desired. */
	LibRecodeConverter(std::string const & inEnc, std::string const & outEnc, bool const & sloppy=false);
	virtual ~LibRecodeConverter();
	Conversion const * make(char const * const buf, size_t const bufSize) const;
};
#endif


/** @brief Character set converter, using libc's "iconv" directly. */
class IConvConverter: public Converter
{
private:
	iconv_t conversion_;

public:
	/** @brief Constructor; this does not support sloppy encoding. */
	IConvConverter(std::string const & inEnc, std::string const & outEnc, const bool & sloppy = false);
	virtual ~IConvConverter();

	Conversion const * make(char const * const buf, size_t const bufSize) const;
};


/** @brief Converter for URL-Encoding. */
class URLConverter: public Converter
{
public:
	/** @brief Constructor; inEnc and outEnc are fixed by the encode flag. */
	URLConverter(bool const encode=true);

	Conversion const * make(char const * const buf, size_t const bufSize) const;

	/** @note All below is copied 1-1 from the from URL class. */
private:
	static const unsigned char MinNum = 0x30;
	static const unsigned char MaxNum = 0x39;
	static const unsigned char MinAlp = 0x41;
	static const unsigned char MaxAlp = 0x46;
	static const unsigned char NumMod = 0x30;
	static const unsigned char AlpMod = 0x37;
	static const unsigned char MinAlp2 = 0x61;
	static const unsigned char MaxAlp2 = 0x66;
	static const unsigned char AlpMod2 = 0x57;


	/** @brief Convert an ASCII representation of a hex digit into its byte value e.g. : "b" = '00001011" = 11.
	 *
	 * @param in the hex digit ASCII code
	 *
	 * @returns the byte value of the hex digit
	 */
	static unsigned char number(unsigned char in);

public:
	/**@brief Encode an URL string.
	 *
	 * The following rules are applied in the conversion:
	 * The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9"
	 * remain the same.
	 * The special characters ".", "-", "*", and "_" remain the same.
	 * The space character " " is converted into a plus sign "+".
	 * The other special characters will be converted in a sequence of the form "%xy"
	 * which epresenting a byte where xy is the two-digit hexadecimal representation of
	 * the 8 bits.
	 *
	 * @param url the URL to encode
	 *
	 * @returns the encoded URL
	 */
	static std::string encode(std::string const & url);

	/** @brief Decode an URL encoded string.
	 *
	 * The following rules are applied in the conversion:
	 * The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9"
	 * remain the same.
	 * The special characters ".", "-", "*", and "_" remain the same.
	 * The plus sign "+" is converted into a space character " ".
	 * A sequence of the form "%xy" will be treated as representing a byte where xy is
	 * the two-digit hexadecimal representation of the 8 bits.
	 *
	 * @param url the encoded URL string
	 *
	 * @returns the decoded URL
	 */
	static std::string decode(std::string const & url);
};


/** @brief Converter for Base64 encoding, using CommonCpp functions. */
class Cpp2Base64Converter: public Converter
{
public:
	/** @brief Constructor; inEnc and outEnc are fixed by the encode flag. */
	Cpp2Base64Converter(bool const encode=true);
	Conversion const * make(char const * const buf, size_t const bufSize) const;
};


/** @brief Converter charEncoding -> rfc2047 (MIME). */
class Rfc2047Converter: public Converter
{
public:
	/** @brief Constructor; inEnc and outEnc are fixed by the encode flag. */
	Rfc2047Converter(std::string const & inCharEnc);
	Conversion const * make(char const * const buf, size_t const bufSize) const;
};


/** @brief Converter UTF-8 <-> rfc3490 (IDN). */
class Rfc3490Utf8Converter: public Converter
{
public:
	/** @brief Constructor; inEnc and outEnc are fixed by the encode flag. */
	Rfc3490Utf8Converter(bool const encode=true);
	Conversion const * make(char const * const buf, size_t const bufSize) const;
};


/** @brief Converter from an-char-encoded string with randomly placed email-adresse -> the same string w/ domain parts magically replaced with IDN-encoding. */
class IdnEMailConverter: public Converter
{
public:
	/** @brief Constructor; inEnc and outEnc are fixed by the encode flag. */
	IdnEMailConverter(std::string const & inEnc);
	Conversion const * make(char const * const buf, size_t const bufSize) const;
};


/** @brief Meta converter class: converter switcher and generic interface. */
class Recoder
{
private:
	bool isEnc(std::string const & enc, std::string const & encs) const;
	Converter const * converter_;

public:
	/** @brief Constructor; sloppy is only supported by some converters. */
	Recoder(std::string const & inEnc, std::string const & outEnc, bool const & sloppy=false);
	~Recoder();

	/** @name Run conversions.
	 *
	 * - Use non-factory methods "run" for temporary/small conversions.
	 * - Use factory methods "make" if results are kept, or your conversion
	 *   is big and you want to optimize your code.
	 *
	 * @note Note again that Converter's are factories for
	 * Conversion's. If you use a methode here returning a Conversion
	 * pointer, you are reponsible for deletion as well.
	 *
	 * @{ */
	Conversion const * make(char const * src, size_t srcSize) const;
	Conversion const * make(char const * src) const;

	std::string run(char const * const buf, size_t const bufSize) const;
	std::string run(char const * const src) const;
	std::string run(std::string const & src) const;
	/** @} */
};


/** @brief This can be used to cache recoder instances.
 *
 * @note You really should use this instead of repeatedly creating
 * temporary Recoder instances -- first, as it's expensive (at least
 * for librecode), and second, librecode converters have a rather huge
 * memory leak.
 */

class RecoderCache
{
public:
	static Recoder const & get(std::string const & inEnc, std::string const & outEnc, bool const & sloppy=false);
	~RecoderCache();

private:
	typedef std::map<std::string, Recoder *> RecoderMap;
	RecoderMap cache_;
	static RecoderCache * instance_;
};

}}
#endif
libui-utilcpp-dev 1.8.5-1build3 / usr / include / ui-utilcpp / Recoder.hpp