/usr/include/openturns/simd.h is in libopenturns-dev 1.2-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | #ifndef TUTILS_SIMD_H
#define TUTILS_SIMD_H
#include <cstddef>
/* Quick and dirty patch because SSE2, as implemented in this file, doesn't work on x86_64 architecture */
#undef __SSE2__
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#include <stdint.h>
namespace tutils
{
class uint64v2_t;
static inline uint64v2_t operator&(uint64v2_t const& x, uint64v2_t const& y);
static inline uint64v2_t operator|(uint64v2_t const& x, uint64v2_t const& y);
static inline uint64v2_t operator^(uint64v2_t const& x, uint64v2_t const& y);
static inline uint64v2_t operator>>(uint64v2_t const& x, int s);
static inline uint64v2_t operator<<(uint64v2_t const& x, int s);
static inline uint64v2_t swap64(uint64v2_t const& x);
template <int S>
static inline uint64v2_t rshift128(uint64v2_t const& x);
template <int S>
static inline uint64v2_t lshift_byte(uint64v2_t const& x);
inline void simd_empty() {
#ifdef __SSE2__
_mm_empty();
#endif
}
class uint64v2_t
{
public:
uint64v2_t() {}
uint64v2_t(uint64_t u1, uint64_t u0) {
set(u1, u0);
}
void set(uint64_t u1, uint64_t u0) {
(*this)[0] = u0;
(*this)[1] = u1;
}
uint64_t const& operator[](std::size_t i) const {
#ifdef __SSE2__
return reinterpret_cast<uint64_t const*>(&vec_)[i];
#else
return u64_[i];
#endif
}
uint64_t& operator[](std::size_t i) {
#ifdef __SSE2__
return reinterpret_cast<uint64_t*>(&vec_)[i];
#else
return u64_[i];
#endif
}
uint64v2_t& operator&=(uint64v2_t const& y) {
#ifdef __SSE2__
vec_ = _mm_and_si128(vec_, y.vec_);
#else
u64_[0] &= y.u64_[0];
u64_[1] &= y.u64_[1];
#endif
return *this;
}
uint64v2_t& operator|=(uint64v2_t const& y) {
#ifdef __SSE2__
vec_ = _mm_or_si128(vec_, y.vec_);
#else
u64_[0] |= y.u64_[0];
u64_[1] |= y.u64_[1];
#endif
return *this;
}
uint64v2_t& operator^=(uint64v2_t const& y) {
#ifdef __SSE2__
vec_ = _mm_xor_si128(vec_, y.vec_);
#else
u64_[0] ^= y.u64_[0];
u64_[1] ^= y.u64_[1];
#endif
return *this;
}
uint64v2_t& operator>>=(int s) {
#ifdef __SSE2__
vec_ = _mm_srli_epi64(vec_, s);
#else
u64_[0] >>= s;
u64_[1] >>= s;
#endif
return *this;
}
uint64v2_t& operator<<=(int s) {
#ifdef __SSE2__
vec_ = _mm_slli_epi64(vec_, s);
#else
u64_[0] <<= s;
u64_[1] <<= s;
#endif
return *this;
}
friend uint64v2_t operator&(uint64v2_t const& x, uint64v2_t const& y);
friend uint64v2_t operator|(uint64v2_t const& x, uint64v2_t const& y);
friend uint64v2_t operator^(uint64v2_t const& x, uint64v2_t const& y);
friend uint64v2_t operator>>(uint64v2_t const& x, int s);
friend uint64v2_t operator<<(uint64v2_t const& x, int s);
friend uint64v2_t swap64(uint64v2_t const& x);
template <int S>
friend uint64v2_t rshift128(uint64v2_t const& x);
template <int S>
friend uint64v2_t lshift_byte(uint64v2_t const& x);
private:
#ifdef __SSE2__
__m128i vec_;
uint64v2_t(__m128i const& y) : vec_(y) {
}
#else
uint64_t u64_[2];
#endif
};
uint64v2_t operator&(uint64v2_t const& x, uint64v2_t const& y) {
#ifdef __SSE2__
return _mm_and_si128(x.vec_, y.vec_);
#else
return uint64v2_t(x[1] & y[1], x[0] & y[0]);
#endif
}
uint64v2_t operator|(uint64v2_t const& x, uint64v2_t const& y) {
#ifdef __SSE2__
return _mm_or_si128(x.vec_, y.vec_);
#else
return uint64v2_t(x[1] | y[1], x[0] | y[0]);
#endif
}
uint64v2_t operator^(uint64v2_t const& x, uint64v2_t const& y) {
#ifdef __SSE2__
return _mm_xor_si128(x.vec_, y.vec_);
#else
return uint64v2_t(x[1] ^ y[1], x[0] ^ y[0]);
#endif
}
uint64v2_t operator>>(uint64v2_t const& x, int s) {
#ifdef __SSE2__
return _mm_srli_epi64(x.vec_, s);
#else
return uint64v2_t(x[1] >> s, x[0] >> s);
#endif
}
uint64v2_t operator<<(uint64v2_t const& x, int s) {
#ifdef __SSE2__
return _mm_slli_epi64(x.vec_, s);
#else
return uint64v2_t(x[1] << s, x[0] << s);
#endif
}
uint64v2_t swap64(uint64v2_t const& x) {
#ifdef __SSE2__
return _mm_shuffle_epi32(x.vec_, 0x4e);
#else
return uint64v2_t(x[0], x[1]);
#endif
}
template <int S>
uint64v2_t rshift128(uint64v2_t const& x) {
#ifdef __SSE2__
return _mm_srli_si128(x.vec_, S);
#else
int const s(8 * S);
return uint64v2_t(x[1] >> s, (x[1] << (64 - s)) | (x[0] >> s));
#endif
}
template <int S>
uint64v2_t lshift_byte(uint64v2_t const& x) {
#ifdef __SSE2__
return _mm_slli_si128(x.vec_, S);
#else
int const s(8 * S);
return uint64v2_t((x[1] << s) | (x[0] >> (64 - s)), x[0] << s);
#endif
}
}
#endif
|