95#ifndef SIMD_VEC_MASK_IMPL_EMU_H_
96#define SIMD_VEC_MASK_IMPL_EMU_H_
101#include "intel/base_impl_intel64.H"
108#ifndef SIMDVEC_SANDBOX
117template <
typename T,
size_t SIMD_WIDTH>
120 Vec<T, SIMD_WIDTH> mask;
124 explicit SIMD_INLINE
Mask(
const Vec<T, SIMD_WIDTH> &x) : mask(x) {}
125 SIMD_INLINE
Mask(
const uint64_t x) : mask(
int2bits<T, SIMD_WIDTH>(x)) {}
126 explicit SIMD_INLINE
operator Vec<T, SIMD_WIDTH>()
const {
return mask; }
128 SIMD_INLINE
bool operator[](
const size_t i)
const
132 store(mask_array, mask);
133 return mask_array[i] != T(0);
142 void *
operator new(
size_t size) {
return aligned_malloc(SIMD_WIDTH, size); }
144 void *
operator new[](
size_t size) {
return aligned_malloc(SIMD_WIDTH, size); }
151#define EMULATE_SOP_NAME(OP, OP_NAME) \
152 template <typename T, size_t SIMD_WIDTH> \
153 static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_##OP_NAME( \
154 const Mask<T, SIMD_WIDTH> &k, const Vec<T, SIMD_WIDTH> &a) \
156 return mask::mask_ifelsezero(k, OP(a)); \
158 template <typename T, size_t SIMD_WIDTH> \
159 static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_##OP_NAME( \
160 const Vec<T, SIMD_WIDTH> &src, const Mask<T, SIMD_WIDTH> &k, \
161 const Vec<T, SIMD_WIDTH> &a) \
163 return mask::mask_ifelse(k, OP(a), src); \
166#define EMULATE_SOP(OP) EMULATE_SOP_NAME(OP, OP)
168#define EMULATE_DOP_NAME(OP, OP_NAME) \
169 template <typename T, size_t SIMD_WIDTH> \
170 static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_##OP_NAME( \
171 const Mask<T, SIMD_WIDTH> &k, const Vec<T, SIMD_WIDTH> &a, \
172 const Vec<T, SIMD_WIDTH> &b) \
174 return mask::mask_ifelsezero(k, OP(a, b)); \
176 template <typename T, size_t SIMD_WIDTH> \
177 static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_##OP_NAME( \
178 const Vec<T, SIMD_WIDTH> &src, const Mask<T, SIMD_WIDTH> &k, \
179 const Vec<T, SIMD_WIDTH> &a, const Vec<T, SIMD_WIDTH> &b) \
181 return mask::mask_ifelse(k, OP(a, b), src); \
184#define EMULATE_DOP(OP) EMULATE_DOP_NAME(OP, OP)
186template <
typename T,
size_t SIMD_WIDTH>
187static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_ifelse(
188 const Mask<T, SIMD_WIDTH> &k,
const Vec<T, SIMD_WIDTH> &trueVal,
189 const Vec<T, SIMD_WIDTH> &falseVal)
191 return ifelse((Vec<T, SIMD_WIDTH>) k, trueVal, falseVal);
195template <
typename T,
size_t SIMD_WIDTH>
196static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_ifelsezero(
197 const Mask<T, SIMD_WIDTH> &k,
const Vec<T, SIMD_WIDTH> &trueVal)
199 return bit_and((Vec<T, SIMD_WIDTH>) k, trueVal);
202template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
203static SIMD_INLINE Mask<Tout, SIMD_WIDTH> reinterpret_mask(
204 const Mask<Tin, SIMD_WIDTH> &k)
206 static_assert(
sizeof(Tout) ==
sizeof(Tin),
"");
211template <
size_t SIMD_WIDTH>
212SIMD_INLINE Vec<Int, SIMD_WIDTH> maskz_cvts(
const Mask<Float, SIMD_WIDTH> &k,
213 const Vec<Float, SIMD_WIDTH> &a)
215 return mask::mask_ifelsezero(mask::reinterpret_mask<Int>(k),
219template <
size_t SIMD_WIDTH>
220SIMD_INLINE Vec<Int, SIMD_WIDTH> mask_cvts(
const Vec<Int, SIMD_WIDTH> &src,
221 const Mask<Float, SIMD_WIDTH> &k,
222 const Vec<Float, SIMD_WIDTH> &a)
228template <
size_t SIMD_WIDTH>
229SIMD_INLINE Vec<Float, SIMD_WIDTH> maskz_cvts(
const Mask<Int, SIMD_WIDTH> &k,
230 const Vec<Int, SIMD_WIDTH> &a)
232 return mask::mask_ifelsezero(mask::reinterpret_mask<Float>(k),
236template <
size_t SIMD_WIDTH>
237SIMD_INLINE Vec<Float, SIMD_WIDTH> mask_cvts(
const Vec<Float, SIMD_WIDTH> &src,
238 const Mask<Int, SIMD_WIDTH> &k,
239 const Vec<Int, SIMD_WIDTH> &a)
241 return mask::mask_ifelse(mask::reinterpret_mask<Float>(k),
252template <
size_t SIMD_WIDTH,
typename T>
253static SIMD_INLINE
bool is_within_same_page(
const T *
const p)
255 const uintptr_t PAGE_SIZE = 4096;
256 const uintptr_t begin_page =
257 reinterpret_cast<uintptr_t
>(p) & ~(PAGE_SIZE - 1);
259 const uintptr_t end_page =
262 (
reinterpret_cast<uintptr_t
>(p) + SIMD_WIDTH - 1) & ~(PAGE_SIZE - 1);
263 return begin_page == end_page;
266template <
typename T,
size_t SIMD_WIDTH>
267static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_load(
const Mask<T, SIMD_WIDTH> &k,
277 if (is_within_same_page<SIMD_WIDTH>(p)) {
283 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
285 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
286 if (k_arr[i] != T(0)) { result[i] = p[i]; }
291template <
typename T,
size_t SIMD_WIDTH>
292static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_load(
const Vec<T, SIMD_WIDTH> &src,
293 const Mask<T, SIMD_WIDTH> &k,
301 if (is_within_same_page<SIMD_WIDTH>(p)) {
307 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
310 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
311 if (k_arr[i] != T(0)) { result[i] = p[i]; }
316template <
typename T,
size_t SIMD_WIDTH>
317static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_loadu(
const Mask<T, SIMD_WIDTH> &k,
327 if (is_within_same_page<SIMD_WIDTH>(p)) {
333 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
335 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
336 if (k_arr[i] != T(0)) { result[i] = p[i]; }
341template <
typename T,
size_t SIMD_WIDTH>
342static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_loadu(
const Vec<T, SIMD_WIDTH> &src,
343 const Mask<T, SIMD_WIDTH> &k,
351 if (is_within_same_page<SIMD_WIDTH>(p)) {
357 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
360 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
361 if (k_arr[i] != T(0)) { result[i] = p[i]; }
366template <
typename T,
size_t SIMD_WIDTH>
367static SIMD_INLINE
void mask_store(T *
const p,
const Mask<T, SIMD_WIDTH> &k,
368 const Vec<T, SIMD_WIDTH> &a)
380 if (is_within_same_page<SIMD_WIDTH>(p)) {
385 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
388 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
389 if (k_arr[i] != T(0)) { p[i] = a_arr[i]; }
393template <
typename T,
size_t SIMD_WIDTH>
394static SIMD_INLINE
void mask_storeu(T *
const p,
const Mask<T, SIMD_WIDTH> &k,
395 const Vec<T, SIMD_WIDTH> &a)
407 if (is_within_same_page<SIMD_WIDTH>(p)) {
412 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
415 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
416 if (k_arr[i] != T(0)) { p[i] = a_arr[i]; }
422template <
typename T,
size_t SIMD_WIDTH>
423static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_set1(
const Mask<T, SIMD_WIDTH> &k,
428template <
typename T,
size_t SIMD_WIDTH>
429static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_set1(
const Vec<T, SIMD_WIDTH> &src,
430 const Mask<T, SIMD_WIDTH> &k,
464EMULATE_DOP_NAME(
bit_or, or)
474#define EMULATE_SHIFT(OP) \
475 template <size_t COUNT, typename T, size_t SIMD_WIDTH> \
476 static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_##OP( \
477 const Mask<T, SIMD_WIDTH> &k, const Vec<T, SIMD_WIDTH> &a) \
479 return mask::mask_ifelsezero(k, OP<COUNT>(a)); \
481 template <size_t COUNT, typename T, size_t SIMD_WIDTH> \
482 static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_##OP( \
483 const Vec<T, SIMD_WIDTH> &src, const Mask<T, SIMD_WIDTH> &k, \
484 const Vec<T, SIMD_WIDTH> &a) \
486 return mask::mask_ifelse(k, OP<COUNT>(a), src); \
501#define EMULATE_CMP(OP) \
502 template <typename T, size_t SIMD_WIDTH> \
503 static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_##OP( \
504 const Mask<T, SIMD_WIDTH> &k, const Vec<T, SIMD_WIDTH> &a, \
505 const Vec<T, SIMD_WIDTH> &b) \
507 return Mask<T, SIMD_WIDTH>(mask::mask_ifelsezero(k, OP(a, b))); \
509 template <typename T, size_t SIMD_WIDTH> \
510 static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_##OP( \
511 const Vec<T, SIMD_WIDTH> &a, const Vec<T, SIMD_WIDTH> &b) \
513 return Mask<T, SIMD_WIDTH>(OP(a, b)); \
525template <
typename T,
size_t SIMD_WIDTH>
526static SIMD_INLINE
bool mask_test_all_zeros(
const Mask<T, SIMD_WIDTH> &k,
527 const Vec<T, SIMD_WIDTH> &a)
532template <
typename T,
size_t SIMD_WIDTH>
533static SIMD_INLINE
bool mask_test_all_ones(
const Mask<T, SIMD_WIDTH> &k,
534 const Vec<T, SIMD_WIDTH> &a)
536 return mask::mask_test_all_zeros(
541template <
typename T,
size_t SIMD_WIDTH>
542static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_all_ones(OutputType<T>,
548#define EMULATE_DMASKOP(NAME) \
549 template <typename T, size_t SIMD_WIDTH> \
550 static SIMD_INLINE Mask<T, SIMD_WIDTH> k##NAME(const Mask<T, SIMD_WIDTH> &a, \
551 const Mask<T, SIMD_WIDTH> &b) \
553 return (Mask<T, SIMD_WIDTH>) NAME##_((Vec<T, SIMD_WIDTH>) a, \
554 (Vec<T, SIMD_WIDTH>) b); \
561template <
typename T,
size_t SIMD_WIDTH>
562static SIMD_INLINE Mask<T, SIMD_WIDTH> kandn(
const Mask<T, SIMD_WIDTH> &a,
563 const Mask<T, SIMD_WIDTH> &b)
565 return (Mask<T, SIMD_WIDTH>)
bit_andnot((Vec<T, SIMD_WIDTH>) a,
566 (Vec<T, SIMD_WIDTH>) b);
575template <
typename T,
size_t SIMD_WIDTH>
576static SIMD_INLINE Mask<T, SIMD_WIDTH> kxnor(
const Mask<T, SIMD_WIDTH> &a,
577 const Mask<T, SIMD_WIDTH> &b)
579 return (Mask<T, SIMD_WIDTH>)
bit_not(
580 bit_xor((Vec<T, SIMD_WIDTH>) a, (Vec<T, SIMD_WIDTH>) b));
583template <
typename T,
size_t SIMD_WIDTH>
584static SIMD_INLINE Mask<T, SIMD_WIDTH> kadd(
const Mask<T, SIMD_WIDTH> &a,
585 const Mask<T, SIMD_WIDTH> &b)
587 Mask<T, SIMD_WIDTH> ret;
588 ret = (((uintmax_t) a) + ((uintmax_t) b));
592template <
typename T,
size_t SIMD_WIDTH>
593static SIMD_INLINE Mask<T, SIMD_WIDTH> knot(
const Mask<T, SIMD_WIDTH> &a)
595 return (Mask<T, SIMD_WIDTH>)
bit_not((Vec<T, SIMD_WIDTH>) a);
601template <
typename T,
size_t SIMD_WIDTH>
602static SIMD_INLINE Mask<T, SIMD_WIDTH> kshiftri(
const Mask<T, SIMD_WIDTH> &a,
609 if (count >=
sizeof(uintmax_t) * 8) {
return Mask<T, SIMD_WIDTH>(0); }
610 return (Mask<T, SIMD_WIDTH>) (((uintmax_t) a) >> count);
612template <
typename T,
size_t SIMD_WIDTH>
613static SIMD_INLINE Mask<T, SIMD_WIDTH> kshiftli(
const Mask<T, SIMD_WIDTH> &a,
620 if (count >=
sizeof(uintmax_t) * 8) {
return Mask<T, SIMD_WIDTH>(0); }
621 return (Mask<T, SIMD_WIDTH>) (((uintmax_t) a) << count);
625template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
626static SIMD_INLINE Mask<T, SIMD_WIDTH> kshiftri(
const Mask<T, SIMD_WIDTH> &a)
628 return (Mask<T, SIMD_WIDTH>)
srle<COUNT>((Vec<T, SIMD_WIDTH>) a);
630template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
631static SIMD_INLINE Mask<T, SIMD_WIDTH> kshiftli(
const Mask<T, SIMD_WIDTH> &a)
633 return (Mask<T, SIMD_WIDTH>)
slle<COUNT>((Vec<T, SIMD_WIDTH>) a);
639template <
bool UP,
typename T,
size_t SIMD_WIDTH>
645 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
649 i < Vec<T, SIMD_WIDTH>::elems * 2; i++) {
655template <
typename T,
size_t SIMD_WIDTH>
656static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_set_true_low(
const size_t x,
661 return mask_all_ones(OutputType<T>(), Integer<SIMD_WIDTH>());
663 static MaskSetBuffer<false, T, SIMD_WIDTH> buffer;
664 return Mask<T, SIMD_WIDTH>(
668template <
typename T,
size_t SIMD_WIDTH>
669static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_set_true_high(
const size_t x,
674 return mask_all_ones(OutputType<T>(), Integer<SIMD_WIDTH>());
676 static MaskSetBuffer<true, T, SIMD_WIDTH> buffer;
680template <
typename T,
size_t SIMD_WIDTH>
681static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_set_false_low(
const size_t x,
686 static MaskSetBuffer<true, T, SIMD_WIDTH> buffer;
687 return Mask<T, SIMD_WIDTH>(
691template <
typename T,
size_t SIMD_WIDTH>
692static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_set_false_high(
const size_t x,
697 static MaskSetBuffer<false, T, SIMD_WIDTH> buffer;
703template <
typename T,
size_t SIMD_WIDTH>
704static SIMD_INLINE
bool ktest_all_zeros(
const Mask<T, SIMD_WIDTH> &a)
709template <
typename T,
size_t SIMD_WIDTH>
710static SIMD_INLINE
bool ktest_all_ones(
const Mask<T, SIMD_WIDTH> &a)
717template <
typename T,
size_t SIMD_WIDTH>
718static SIMD_INLINE
bool kcmpeq(
const Mask<T, SIMD_WIDTH> &a,
719 const Mask<T, SIMD_WIDTH> &b)
721 return internal::mask::ktest_all_zeros(internal::mask::kxor(a, b));
bool operator[](const size_t i) const
Returns the Mask bit at the given index.
Mask(const Vec< T, SIMD_WIDTH > &x)
Constructs a Mask from a Vec.
bool operator==(const Mask< T, SIMD_WIDTH > &other) const
Compares the Mask with another Mask.
static constexpr size_t elems
Number of elements in the vector. Alias for elements.
Definition vec.H:85
void * aligned_malloc(size_t alignment, size_t size)
Aligned memory allocation.
Definition alloc.H:61
void aligned_free(void *ptr)
Aligned memory deallocation.
Definition alloc.H:102
static Vec< T, SIMD_WIDTH > sub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's.
Definition base.H:388
static Vec< T, SIMD_WIDTH > subs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's using saturated arithmetic.
Definition base.H:405
static Vec< T, SIMD_WIDTH > avg(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the average of the elements of two Vec's, rounded up.
Definition base.H:456
static Vec< T, SIMD_WIDTH > div2rd(const Vec< T, SIMD_WIDTH > &a)
Divides all elements of a Vec by 2 and rounds down the result.
Definition ext.H:1776
static Vec< T, SIMD_WIDTH > adds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's using saturated arithmetic.
Definition base.H:374
static Vec< T, SIMD_WIDTH > div2r0(const Vec< T, SIMD_WIDTH > &a)
Divides all elements of a Vec by 2 and rounds the result to 0.
Definition ext.H:1696
static Vec< T, SIMD_WIDTH > div(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Divides the elements of two Vec's.
Definition base.H:439
static Vec< T, SIMD_WIDTH > add(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's.
Definition base.H:357
static Vec< T, SIMD_WIDTH > mul(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Multiplies the elements of two Vec's.
Definition base.H:421
static Vec< T, SIMD_WIDTH > cmplt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than ( < ).
Definition base.H:924
static bool test_all_ones(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are one.
Definition base.H:1054
static Vec< T, SIMD_WIDTH > cmple(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than-or-equal ( <= ).
Definition base.H:945
static bool test_all_zeros(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are zero.
Definition base.H:1042
static Vec< T, SIMD_WIDTH > cmpneq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for inequality ( != ).
Definition base.H:1029
static Vec< T, SIMD_WIDTH > cmpge(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than-or-equal ( >= ).
Definition base.H:987
static Vec< T, SIMD_WIDTH > cmpgt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than ( > ).
Definition base.H:1008
static Vec< T, SIMD_WIDTH > cmpeq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for equality ( == ).
Definition base.H:966
static Vec< T, SIMD_WIDTH > ifelse(const Vec< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal, const Vec< T, SIMD_WIDTH > &falseVal)
Selects elements from two Vec's based on a condition Vec.
Definition base.H:126
static Vec< T, SIMD_WIDTH > slle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec left by a constant number of elements, shifting in zero elements.
Definition base.H:1353
static Vec< T, SIMD_WIDTH > srle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec right by a constant number of elements, shifting in zero elements.
Definition base.H:1338
static Vec< T, SIMD_WIDTH > hsubs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's with saturation.
Definition base.H:523
static Vec< T, SIMD_WIDTH > hadds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's with saturation.
Definition base.H:493
static Vec< T, SIMD_WIDTH > hadd(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's.
Definition base.H:477
static Vec< T, SIMD_WIDTH > hsub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's.
Definition base.H:507
static Vec< T, SIMD_WIDTH > int2bits(const uint64_t a)
Sets all bits of each element of a Vec to the corresponding bit of an integer.
Definition base.H:190
static uint64_t msb2int(const Vec< T, SIMD_WIDTH > &a)
Collects the most significant bit of each element of a Vec into an integer.
Definition base.H:147
static Vec< T, SIMD_WIDTH > setzero()
Returns a Vec with all elements set to zero.
Definition base.H:70
static Vec< T, SIMD_WIDTH > set1(const dont_deduce< T > a)
Returns a Vec with all elements set to the same value.
Definition base.H:88
static Vec< T, SIMD_WIDTH > bit_andnot(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise ANDNOT of two Vec's.
Definition base.H:762
static Vec< T, SIMD_WIDTH > bit_and(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise AND of two Vec's.
Definition base.H:732
static Vec< T, SIMD_WIDTH > bit_xor(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise XOR of two Vec's.
Definition base.H:776
static Vec< T, SIMD_WIDTH > bit_or(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise OR of two Vec's.
Definition base.H:746
static Vec< T, SIMD_WIDTH > bit_not(const Vec< T, SIMD_WIDTH > &a)
Computes the bitwise NOT of a Vec.
Definition base.H:789
static Vec< T, SIMD_WIDTH > sqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the square root of the elements of a Vec.
Definition base.H:584
static Vec< T, SIMD_WIDTH > rcp(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal of the elements of a Vec.
Definition base.H:547
static Vec< T, SIMD_WIDTH > rsqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal square root of the elements of a Vec.
Definition base.H:565
static Vec< T, SIMD_WIDTH > truncate(const Vec< T, SIMD_WIDTH > &a)
Truncates the elements of a Vec to the nearest integer i.e. rounds towards zero.
Definition base.H:712
static Vec< T, SIMD_WIDTH > min(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the minimum of the elements of two Vec's.
Definition base.H:606
static Vec< T, SIMD_WIDTH > floor(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec down to the nearest integer.
Definition base.H:683
static Vec< T, SIMD_WIDTH > max(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the maximum of the elements of two Vec's.
Definition base.H:620
static Vec< T, SIMD_WIDTH > ceil(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec up to the nearest integer.
Definition base.H:668
static Vec< T, SIMD_WIDTH > neg(const Vec< T, SIMD_WIDTH > &a)
Negates the elements of a Vec.
Definition base.H:635
static Vec< T, SIMD_WIDTH > round(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec to the nearest integer.
Definition base.H:697
static Vec< T, SIMD_WIDTH > abs(const Vec< T, SIMD_WIDTH > &a)
Computes the absolute value of the elements of a Vec.
Definition base.H:654
static Vec< T, SIMD_WIDTH > load(const T *const p)
Loads a Vec from aligned memory.
Definition base.H:209
static Vec< T, SIMD_WIDTH > loadu(const T *const p)
Loads a Vec from unaligned memory.
Definition base.H:231
static void store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory.
Definition base.H:246
static void storeu(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to unaligned memory.
Definition base.H:265
static Vec< T, SIMD_WIDTH > srli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in zeros.
Definition base.H:828
static Vec< T, SIMD_WIDTH > slli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec left by a constant number of bits while shifting in zeros.
Definition base.H:844
static Vec< T, SIMD_WIDTH > srai(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in the sign bit.
Definition base.H:812
static Vec< Tout, SIMD_WIDTH > cvts(const Vec< Tin, SIMD_WIDTH > &a)
Converts the elements of a Vec between integer and floating point types of the same size.
Definition base.H:1445
static Vec< Tout, SIMD_WIDTH > reinterpret(const Vec< Tin, SIMD_WIDTH > &a)
Reinterprets a given Vec as a Vec with a different element type.
Definition base.H:58
Namespace for T-SIMD.
Definition time_measurement.H:161
static constexpr T trueval()
Returns a value where all bits are 1.
Definition types.H:311