/usr/include/sdsl/coder_comma.hpp is in libsdsl-dev 2.0.3-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | /* sdsl - succinct data structures library
Copyright (C) 2009 Simon Gog
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/ .
*/
/*! \file coder_comma.hpp
\brief coder_comma.hpp contains the class sdsl::coder::comma
\author Uwe Baier
*/
#ifndef SDSL_CODER_COMMA_INCLUDED
#define SDSL_CODER_COMMA_INCLUDED
#include <sdsl/bits.hpp>
#include <array>
#include <math.h>
namespace sdsl {
namespace coder {
//! A class to encode and decode between comma code and binary code.
/*! \author Uwe Baier
*
* Comma coding works as the following:
* First of all, comma coding needs a parameter t_width which indicates
* how big a encoded digit will be (in bits), let's say t_width = 2.
* By use of t_width one can calculate a base for encoding, in detail
* this means
* base = 2^t_width - 1
* now, given any number it is encoded as the follows: The number gets displayed
* in the calculated base, and each digit of the number is saved with t_width bits.
* To indicate the end of the number, a termination digit is used (namely this is
* the value base).
* Example:
* t_width = 2 => base = 2^2 - 1 = 3
* Value to be encoded: 15
* 15 (base 10) = 120 (base 3)
* Encoded value: 120 (plus termination digit) = 01 10 00 11 in binary
* (last digit is termination digit)
*
* \tparam t_width Width of one digit used in comma code
*/
template<uint8_t t_width = 2>
class comma {
private:
static_assert(t_width > 1 && t_width <= 32,
"comma coder: Width must be in interval [2,32]");
//base in which numbers are coded
static const uint32_t base = (1 << t_width) - 1;
//table needed for computation of encoding lengths.
//table contains entries of the kind (index, base^index)
//to know how much digits a number needs to be encoded.
static const size_t codelentbllen = ceil(64 / log2(base));
static std::array<uint64_t, codelentbllen> codelentbl;
//utility function to set up codelen table
static std::array<uint64_t, codelentbllen> createCodeLenTbl();
//helper function to encode a single number without
//termination digit
static void encode_in_base(uint64_t x, uint64_t *& z,
uint8_t& offset);
public:
typedef uint64_t size_type;
static const uint8_t min_codeword_length =
t_width; //0 needs t_width bits as termination
//// ENCODING /////////////////////////////////////////////////
//! Get the number of bits that are necessary to encode
// the value w in comma code.
/*! \param w 64bit int to get the length of its comma encoding.
*/
static uint8_t encoding_length(uint64_t w);
//! Encode one positive integer x to an int_vector
// at bit position start_idx.
/* \param x Positive integer to encode.
\param z Raw data of vector to write the encoded form of x.
\param start_idx Beginning bit index to write the encoded form ox x in z.
*/
static void encode(uint64_t x, uint64_t*& z, uint8_t& offset);
//! Encode integers contained in vector v into vector z
/* \param v vector containing positive integer values
\param z vector to put the encoded values
*/
template<class int_vector>
static bool encode(const int_vector& v, int_vector& z);
//// DECODING /////////////////////////////////////////////////
//! Decode n comma encoded values beginning at start_idx
// in the bitstring "data"
/* \param data Bitstring
\param start_idx Starting index of the decoding.
\param n Number of values to decode from the bitstring.
\param it Iterator to store the values.
*/
template<bool t_sumup, bool t_inc,class t_iter>
static uint64_t decode(const uint64_t* data,
const size_type start_idx, size_type n,
t_iter it=(t_iter)nullptr);
//! Decode n comma gamma encoded integers
// beginning at start_idx in the bitstring "data"
// and return the sum of these values.
/*! \param data Pointer to the beginning
of the comma encoded bitstring.
\param start_idx Index of the first bit
to encode the values from.
\param n Number of values to decode from the bitstring.
Attention: There have to be at least n encoded
values in the bitstring.
*/
static uint64_t decode_prefix_sum(const uint64_t* data,
const size_type start_idx, size_type n);
//! Decode n comma gamma encoded integers
// beginning at start_idx ending at end_idx (exclusive)
// in the bitstring "data"
// and return the sum of these values.
/*! \param data Pointer to the beginning
of the comma encoded bitstring.
\param start_idx Index of the first bit
to encode the values from.
\param end_idx Index of the last bit
to encode the values from.
\param n Number of values to decode from the bitstring.
Attention: There have to be at least n encoded
values in the bitstring.
*/
static uint64_t decode_prefix_sum(const uint64_t* data,
const size_type start_idx, const size_type end_idx,
size_type n);
//! Decode vector z containing comma encoded integers
// and store them in vector v.
/*! \param z vector that contains encoded integers.
\param v vector to store the decoded integers
*/
template<class int_vector>
static bool decode(const int_vector& z, int_vector& v);
//interface needs this function for whatever :>
template<class int_vector>
static uint64_t* raw_data(int_vector& v) {
return v.m_data;
}
};
//// IMPLEMENTATION ///////////////////////////////////////////////////////////
//// CODELENGTH TABLE SETUP ///////////////////////////////
template<uint8_t t_width>
std::array<uint64_t, comma<t_width>::codelentbllen> comma<t_width>::codelentbl =
createCodeLenTbl();
template<uint8_t t_width>
std::array<uint64_t, comma<t_width>::codelentbllen> comma<t_width>::createCodeLenTbl() {
std::array<uint64_t, codelentbllen> tbl;
uint64_t n = 1;
for (size_t i = 0; i < codelentbllen; i++) {
tbl[i] = n;
n = (n << t_width) - n; //n = n * base
}
return tbl;
}
//// Encoding /////////////////////////////////////////////
template<uint8_t t_width>
inline uint8_t comma<t_width>::encoding_length(uint64_t w) {
//use function table and binary search to determine the number of digits
//needed to encode w in given base.
uint8_t numdigits =
std::upper_bound(codelentbl.begin(), codelentbl.end(), w)
- codelentbl.begin();
//finally calculate length.
//Don't forget termination character on calculations ;)
return (numdigits + 1) * t_width;
}
template<uint8_t t_width>
void comma<t_width>::encode_in_base(uint64_t x, uint64_t *& z,
uint8_t& offset) {
if (x) {
uint32_t digit = x % base; //get next digit
//encode digits with higher order
encode_in_base(x / base, z, offset);
//and write own digit
bits::write_int_and_move(z, digit, offset, t_width);
}
}
template<uint8_t t_width>
inline void comma<t_width>::encode(uint64_t x, uint64_t*& z, uint8_t& offset) {
//encode x itself
encode_in_base(x, z, offset);
//and append the termination digit
bits::write_int_and_move(z, base, offset, t_width);
}
template<uint8_t t_width>
template<class int_vector>
bool comma<t_width>::encode(const int_vector& v, int_vector& z) {
//first, find out how much bits vector z needs to save values
typedef typename int_vector::size_type size_type;
size_type z_bit_size = 0;
for (typename int_vector::const_iterator it = v.begin(), end = v.end();
it != end; ++it) {
z_bit_size += encoding_length(*it);
}
//trim vector z to correct size
z.width(v.width());
z.bit_resize(z_bit_size); //for future may check if resizing works
//iterate again and save values in z
uint64_t* z_data = z.m_data;
uint8_t offset = 0;
for (typename int_vector::const_iterator it = v.begin(), end = v.end();
it != end; ++it) {
encode(*it, z_data, offset);
}
return true;
}
//// DECODING /////////////////////////////////////////////
template<uint8_t t_width>
template<bool t_sumup, bool t_inc, class t_iter>
inline uint64_t comma<t_width>::decode(const uint64_t* data,
const size_type start_idx, size_type n, t_iter it) {
data += (start_idx >> 6); //jump to byte offset
uint8_t offset = start_idx & 0x3F; //and calculate bit offset
uint64_t value = 0;
for (size_type i = 0; i < n; i++) {
//read next value
uint64_t v = 0;
for (uint32_t digit = (uint32_t)bits::read_int_and_move(data, offset, t_width); //read first digit
digit != base; //while digit is not the terminating digit
v = (v << t_width) - v + digit, //v = v * base + digit
digit = (uint32_t)bits::read_int_and_move(data, offset, t_width)); //and read next digit
//now decide how to handle value
value = (t_sumup) ? value + v : v;
if (t_inc) *(it++) = value;
}
return value;
}
template<uint8_t t_width>
uint64_t comma<t_width>::decode_prefix_sum(const uint64_t* data,
const size_type start_idx, size_type n) {
//easiest seems to be to use already build function decode...
return decode<true,false,int *>(data, start_idx, n);
//Note for above: 3rd template parameter ca be any pntr except void *
}
template<uint8_t t_width>
uint64_t comma<t_width>::decode_prefix_sum(const uint64_t* data,
const size_type start_idx,
SDSL_UNUSED const size_type end_idx, size_type n) {
//end index does not change anything here...
return decode_prefix_sum(data, start_idx, n);
}
template<uint8_t t_width>
template<class int_vector>
bool comma<t_width>::decode(const int_vector& z, int_vector& v) {
//check if bit size is dividable through t_width.
if (z.bit_size() % t_width != 0) return false;
//calculate num of overall digits in z (including terminating digit)
uint64_t numOfDigits = z.bit_size() / t_width;
//iteration vars for z vector
const uint64_t *z_data = z.data();
uint8_t z_offset = 0;
//utility to count number of entries in z, and last read digit
uint32_t digit = base;
typename int_vector::size_type n = 0;
//iterate over all digits. each time a termination digit is
// detected, a encoded number in vector ends.
while (numOfDigits--) {
digit = (uint32_t)bits::read_int_and_move(z_data, z_offset, t_width);
if (digit == base) n++; //termination digit detected
}
//also, ensure last read digit was a termination digit
if (digit != base) return false;
//resize vector v
v.width(z.width());
v.resize(n);
//and finally decode and save result in v
decode<false, true>(z.data(), 0, n, v.begin());
return true;
}
} //end of namespace coder
} //end of namespace sdsl
#endif
|