/usr/include/htslib/cram/rANS_byte.h is in libhts-private-dev 1.7-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 | /* rans_byte.h originally from https://github.com/rygorous/ryg_rans
*
* This is a public-domain implementation of several rANS variants. rANS is an
* entropy coder from the ANS family, as described in Jarek Duda's paper
* "Asymmetric numeral systems" (http://arxiv.org/abs/1311.2540).
*/
/*-------------------------------------------------------------------------- */
// Simple byte-aligned rANS encoder/decoder - public domain - Fabian 'ryg' Giesen 2014
//
// Not intended to be "industrial strength"; just meant to illustrate the general
// idea.
#ifndef RANS_BYTE_HEADER
#define RANS_BYTE_HEADER
#include <stdint.h>
#ifdef assert
#define RansAssert assert
#else
#define RansAssert(x)
#endif
// READ ME FIRST:
//
// This is designed like a typical arithmetic coder API, but there's three
// twists you absolutely should be aware of before you start hacking:
//
// 1. You need to encode data in *reverse* - last symbol first. rANS works
// like a stack: last in, first out.
// 2. Likewise, the encoder outputs bytes *in reverse* - that is, you give
// it a pointer to the *end* of your buffer (exclusive), and it will
// slowly move towards the beginning as more bytes are emitted.
// 3. Unlike basically any other entropy coder implementation you might
// have used, you can interleave data from multiple independent rANS
// encoders into the same bytestream without any extra signaling;
// you can also just write some bytes by yourself in the middle if
// you want to. This is in addition to the usual arithmetic encoder
// property of being able to switch models on the fly. Writing raw
// bytes can be useful when you have some data that you know is
// incompressible, and is cheaper than going through the rANS encode
// function. Using multiple rANS coders on the same byte stream wastes
// a few bytes compared to using just one, but execution of two
// independent encoders can happen in parallel on superscalar and
// Out-of-Order CPUs, so this can be *much* faster in tight decoding
// loops.
//
// This is why all the rANS functions take the write pointer as an
// argument instead of just storing it in some context struct.
// --------------------------------------------------------------------------
// L ('l' in the paper) is the lower bound of our normalization interval.
// Between this and our byte-aligned emission, we use 31 (not 32!) bits.
// This is done intentionally because exact reciprocals for 31-bit uints
// fit in 32-bit uints: this permits some optimizations during encoding.
#define RANS_BYTE_L (1u << 23) // lower bound of our normalization interval
// State for a rANS encoder. Yep, that's all there is to it.
typedef uint32_t RansState;
// Initialize a rANS encoder.
static inline void RansEncInit(RansState* r)
{
*r = RANS_BYTE_L;
}
// Renormalize the encoder. Internal function.
static inline RansState RansEncRenorm(RansState x, uint8_t** pptr, uint32_t freq, uint32_t scale_bits)
{
uint32_t x_max = ((RANS_BYTE_L >> scale_bits) << 8) * freq; // this turns into a shift.
if (x >= x_max) {
uint8_t* ptr = *pptr;
do {
*--ptr = (uint8_t) (x & 0xff);
x >>= 8;
} while (x >= x_max);
*pptr = ptr;
}
return x;
}
// Encodes a single symbol with range start "start" and frequency "freq".
// All frequencies are assumed to sum to "1 << scale_bits", and the
// resulting bytes get written to ptr (which is updated).
//
// NOTE: With rANS, you need to encode symbols in *reverse order*, i.e. from
// beginning to end! Likewise, the output bytestream is written *backwards*:
// ptr starts pointing at the end of the output buffer and keeps decrementing.
static inline void RansEncPut(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits)
{
// renormalize
RansState x = RansEncRenorm(*r, pptr, freq, scale_bits);
// x = C(s,x)
*r = ((x / freq) << scale_bits) + (x % freq) + start;
}
// Flushes the rANS encoder.
static inline void RansEncFlush(RansState* r, uint8_t** pptr)
{
uint32_t x = *r;
uint8_t* ptr = *pptr;
ptr -= 4;
ptr[0] = (uint8_t) (x >> 0);
ptr[1] = (uint8_t) (x >> 8);
ptr[2] = (uint8_t) (x >> 16);
ptr[3] = (uint8_t) (x >> 24);
*pptr = ptr;
}
// Initializes a rANS decoder.
// Unlike the encoder, the decoder works forwards as you'd expect.
static inline void RansDecInit(RansState* r, uint8_t** pptr)
{
uint32_t x;
uint8_t* ptr = *pptr;
x = ptr[0] << 0;
x |= ptr[1] << 8;
x |= ptr[2] << 16;
x |= ptr[3] << 24;
ptr += 4;
*pptr = ptr;
*r = x;
}
// Returns the current cumulative frequency (map it to a symbol yourself!)
static inline uint32_t RansDecGet(RansState* r, uint32_t scale_bits)
{
return *r & ((1u << scale_bits) - 1);
}
// Advances in the bit stream by "popping" a single symbol with range start
// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits",
// and the resulting bytes get written to ptr (which is updated).
static inline void RansDecAdvance(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits)
{
uint32_t mask = (1u << scale_bits) - 1;
// s, x = D(x)
uint32_t x = *r;
x = freq * (x >> scale_bits) + (x & mask) - start;
// renormalize
if (x < RANS_BYTE_L) {
uint8_t* ptr = *pptr;
do x = (x << 8) | *ptr++; while (x < RANS_BYTE_L);
*pptr = ptr;
}
*r = x;
}
// --------------------------------------------------------------------------
// That's all you need for a full encoder; below here are some utility
// functions with extra convenience or optimizations.
// Encoder symbol description
// This (admittedly odd) selection of parameters was chosen to make
// RansEncPutSymbol as cheap as possible.
typedef struct {
uint32_t x_max; // (Exclusive) upper bound of pre-normalization interval
uint32_t rcp_freq; // Fixed-point reciprocal frequency
uint32_t bias; // Bias
uint16_t cmpl_freq; // Complement of frequency: (1 << scale_bits) - freq
uint16_t rcp_shift; // Reciprocal shift
} RansEncSymbol;
// Decoder symbols are straightforward.
typedef struct {
uint16_t start; // Start of range.
uint16_t freq; // Symbol frequency.
} RansDecSymbol;
// Initializes an encoder symbol to start "start" and frequency "freq"
static inline void RansEncSymbolInit(RansEncSymbol* s, uint32_t start, uint32_t freq, uint32_t scale_bits)
{
RansAssert(scale_bits <= 16);
RansAssert(start <= (1u << scale_bits));
RansAssert(freq <= (1u << scale_bits) - start);
// Say M := 1 << scale_bits.
//
// The original encoder does:
// x_new = (x/freq)*M + start + (x%freq)
//
// The fast encoder does (schematically):
// q = mul_hi(x, rcp_freq) >> rcp_shift (division)
// r = x - q*freq (remainder)
// x_new = q*M + bias + r (new x)
// plugging in r into x_new yields:
// x_new = bias + x + q*(M - freq)
// =: bias + x + q*cmpl_freq (*)
//
// and we can just precompute cmpl_freq. Now we just need to
// set up our parameters such that the original encoder and
// the fast encoder agree.
s->x_max = ((RANS_BYTE_L >> scale_bits) << 8) * freq;
s->cmpl_freq = (uint16_t) ((1 << scale_bits) - freq);
if (freq < 2) {
// freq=0 symbols are never valid to encode, so it doesn't matter what
// we set our values to.
//
// freq=1 is tricky, since the reciprocal of 1 is 1; unfortunately,
// our fixed-point reciprocal approximation can only multiply by values
// smaller than 1.
//
// So we use the "next best thing": rcp_freq=0xffffffff, rcp_shift=0.
// This gives:
// q = mul_hi(x, rcp_freq) >> rcp_shift
// = mul_hi(x, (1<<32) - 1)) >> 0
// = floor(x - x/(2^32))
// = x - 1 if 1 <= x < 2^32
// and we know that x>0 (x=0 is never in a valid normalization interval).
//
// So we now need to choose the other parameters such that
// x_new = x*M + start
// plug it in:
// x*M + start (desired result)
// = bias + x + q*cmpl_freq (*)
// = bias + x + (x - 1)*(M - 1) (plug in q=x-1, cmpl_freq)
// = bias + 1 + (x - 1)*M
// = x*M + (bias + 1 - M)
//
// so we have start = bias + 1 - M, or equivalently
// bias = start + M - 1.
s->rcp_freq = ~0u;
s->rcp_shift = 0;
s->bias = start + (1 << scale_bits) - 1;
} else {
// Alverson, "Integer Division using reciprocals"
// shift=ceil(log2(freq))
uint32_t shift = 0;
while (freq > (1u << shift))
shift++;
s->rcp_freq = (uint32_t) (((1ull << (shift + 31)) + freq-1) / freq);
s->rcp_shift = shift - 1;
// With these values, 'q' is the correct quotient, so we
// have bias=start.
s->bias = start;
}
s->rcp_shift += 32; // Avoid the extra >>32 in RansEncPutSymbol
}
// Initialize a decoder symbol to start "start" and frequency "freq"
static inline void RansDecSymbolInit(RansDecSymbol* s, uint32_t start, uint32_t freq)
{
RansAssert(start <= (1 << 16));
RansAssert(freq <= (1 << 16) - start);
s->start = (uint16_t) start;
s->freq = (uint16_t) freq;
}
// Encodes a given symbol. This is faster than straight RansEnc since we can do
// multiplications instead of a divide.
//
// See RansEncSymbolInit for a description of how this works.
static inline void RansEncPutSymbol(RansState* r, uint8_t** pptr, RansEncSymbol const* sym)
{
RansAssert(sym->x_max != 0); // can't encode symbol with freq=0
// renormalize
uint32_t x = *r;
uint32_t x_max = sym->x_max;
if (x >= x_max) {
uint8_t* ptr = *pptr;
do {
*--ptr = (uint8_t) (x & 0xff);
x >>= 8;
} while (x >= x_max);
*pptr = ptr;
}
// x = C(s,x)
// NOTE: written this way so we get a 32-bit "multiply high" when
// available. If you're on a 64-bit platform with cheap multiplies
// (e.g. x64), just bake the +32 into rcp_shift.
//uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> 32) >> sym->rcp_shift;
// The extra >>32 has already been added to RansEncSymbolInit
uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift);
*r = x + sym->bias + q * sym->cmpl_freq;
}
// Equivalent to RansDecAdvance that takes a symbol.
static inline void RansDecAdvanceSymbol(RansState* r, uint8_t** pptr, RansDecSymbol const* sym, uint32_t scale_bits)
{
RansDecAdvance(r, pptr, sym->start, sym->freq, scale_bits);
}
// Advances in the bit stream by "popping" a single symbol with range start
// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits".
// No renormalization or output happens.
static inline void RansDecAdvanceStep(RansState* r, uint32_t start, uint32_t freq, uint32_t scale_bits)
{
uint32_t mask = (1u << scale_bits) - 1;
// s, x = D(x)
uint32_t x = *r;
*r = freq * (x >> scale_bits) + (x & mask) - start;
}
// Equivalent to RansDecAdvanceStep that takes a symbol.
static inline void RansDecAdvanceSymbolStep(RansState* r, RansDecSymbol const* sym, uint32_t scale_bits)
{
RansDecAdvanceStep(r, sym->start, sym->freq, scale_bits);
}
// Renormalize.
static inline void RansDecRenorm(RansState* r, uint8_t** pptr)
{
// renormalize
uint32_t x = *r;
if (x < RANS_BYTE_L) {
uint8_t* ptr = *pptr;
x = (x << 8) | *ptr++;
if (x < RANS_BYTE_L)
x = (x << 8) | *ptr++;
*pptr = ptr;
}
*r = x;
}
// Renormalize, with extra checks for falling off the end of the input.
static inline void RansDecRenormSafe(RansState* r, uint8_t** pptr, uint8_t *ptr_end)
{
uint32_t x = *r;
uint8_t* ptr = *pptr;
if (x >= RANS_BYTE_L || ptr >= ptr_end) return;
x = (x << 8) | *ptr++;
if (x < RANS_BYTE_L && ptr < ptr_end)
x = (x << 8) | *ptr++;
*pptr = ptr;
*r = x;
}
#endif // RANS_BYTE_HEADER
|