95#ifndef SIMD_VEC_MASK_IMPL_EMU_H_
96#define SIMD_VEC_MASK_IMPL_EMU_H_
101#include "intel/base_impl_intel64.H"
108#ifndef SIMDVEC_SANDBOX
117template <
typename T,
size_t SIMD_WIDTH>
120 Vec<T, SIMD_WIDTH> mask;
124 explicit SIMD_INLINE
Mask(
const Vec<T, SIMD_WIDTH> &x)
127 const auto &xInt = reinterpret<typename TypeInfo<T>::IntegerType>(x);
128 const auto &shifted =
srai<
sizeof(T) * 8 - 1>(xInt);
131 SIMD_INLINE
Mask(
const uint64_t x) : mask(
int2bits<T, SIMD_WIDTH>(x)) {}
132 explicit SIMD_INLINE
operator Vec<T, SIMD_WIDTH>()
const {
return mask; }
134 SIMD_INLINE
bool operator[](
const size_t i)
const
138 store(mask_array, mask);
139 return mask_array[i] != T(0);
148 void *
operator new(
size_t size) {
return aligned_malloc(SIMD_WIDTH, size); }
150 void *
operator new[](
size_t size) {
return aligned_malloc(SIMD_WIDTH, size); }
157#define EMULATE_SOP_NAME(OP, OP_NAME) \
158 template <typename T, size_t SIMD_WIDTH> \
159 static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_##OP_NAME( \
160 const Mask<T, SIMD_WIDTH> &k, const Vec<T, SIMD_WIDTH> &a) \
162 return mask::mask_ifelsezero(k, OP(a)); \
164 template <typename T, size_t SIMD_WIDTH> \
165 static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_##OP_NAME( \
166 const Vec<T, SIMD_WIDTH> &src, const Mask<T, SIMD_WIDTH> &k, \
167 const Vec<T, SIMD_WIDTH> &a) \
169 return mask::mask_ifelse(k, OP(a), src); \
172#define EMULATE_SOP(OP) EMULATE_SOP_NAME(OP, OP)
174#define EMULATE_DOP_NAME(OP, OP_NAME) \
175 template <typename T, size_t SIMD_WIDTH> \
176 static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_##OP_NAME( \
177 const Mask<T, SIMD_WIDTH> &k, const Vec<T, SIMD_WIDTH> &a, \
178 const Vec<T, SIMD_WIDTH> &b) \
180 return mask::mask_ifelsezero(k, OP(a, b)); \
182 template <typename T, size_t SIMD_WIDTH> \
183 static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_##OP_NAME( \
184 const Vec<T, SIMD_WIDTH> &src, const Mask<T, SIMD_WIDTH> &k, \
185 const Vec<T, SIMD_WIDTH> &a, const Vec<T, SIMD_WIDTH> &b) \
187 return mask::mask_ifelse(k, OP(a, b), src); \
190#define EMULATE_DOP(OP) EMULATE_DOP_NAME(OP, OP)
192template <
typename T,
size_t SIMD_WIDTH>
193static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_ifelse(
194 const Mask<T, SIMD_WIDTH> &k,
const Vec<T, SIMD_WIDTH> &trueVal,
195 const Vec<T, SIMD_WIDTH> &falseVal)
197 return ifelse((Vec<T, SIMD_WIDTH>) k, trueVal, falseVal);
201template <
typename T,
size_t SIMD_WIDTH>
202static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_ifelsezero(
203 const Mask<T, SIMD_WIDTH> &k,
const Vec<T, SIMD_WIDTH> &trueVal)
205 return bit_and((Vec<T, SIMD_WIDTH>) k, trueVal);
208template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
209static SIMD_INLINE Mask<Tout, SIMD_WIDTH> reinterpret_mask(
210 const Mask<Tin, SIMD_WIDTH> &k)
212 static_assert(
sizeof(Tout) ==
sizeof(Tin),
"");
217template <
size_t SIMD_WIDTH>
218SIMD_INLINE Vec<Int, SIMD_WIDTH> maskz_cvts(
const Mask<Float, SIMD_WIDTH> &k,
219 const Vec<Float, SIMD_WIDTH> &a)
221 return mask::mask_ifelsezero(mask::reinterpret_mask<Int>(k),
225template <
size_t SIMD_WIDTH>
226SIMD_INLINE Vec<Int, SIMD_WIDTH> mask_cvts(
const Vec<Int, SIMD_WIDTH> &src,
227 const Mask<Float, SIMD_WIDTH> &k,
228 const Vec<Float, SIMD_WIDTH> &a)
234template <
size_t SIMD_WIDTH>
235SIMD_INLINE Vec<Float, SIMD_WIDTH> maskz_cvts(
const Mask<Int, SIMD_WIDTH> &k,
236 const Vec<Int, SIMD_WIDTH> &a)
238 return mask::mask_ifelsezero(mask::reinterpret_mask<Float>(k),
242template <
size_t SIMD_WIDTH>
243SIMD_INLINE Vec<Float, SIMD_WIDTH> mask_cvts(
const Vec<Float, SIMD_WIDTH> &src,
244 const Mask<Int, SIMD_WIDTH> &k,
245 const Vec<Int, SIMD_WIDTH> &a)
247 return mask::mask_ifelse(mask::reinterpret_mask<Float>(k),
258template <
size_t SIMD_WIDTH,
typename T>
259static SIMD_INLINE
bool is_within_same_page(
const T *
const p)
261 const uintptr_t PAGE_SIZE = 4096;
262 const uintptr_t begin_page =
263 reinterpret_cast<uintptr_t
>(p) & ~(PAGE_SIZE - 1);
265 const uintptr_t end_page =
268 (
reinterpret_cast<uintptr_t
>(p) + SIMD_WIDTH - 1) & ~(PAGE_SIZE - 1);
269 return begin_page == end_page;
272template <
typename T,
size_t SIMD_WIDTH>
273static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_load(
const Mask<T, SIMD_WIDTH> &k,
283 if (is_within_same_page<SIMD_WIDTH>(p)) {
289 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
291 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
292 if (k_arr[i] != T(0)) { result[i] = p[i]; }
297template <
typename T,
size_t SIMD_WIDTH>
298static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_load(
const Vec<T, SIMD_WIDTH> &src,
299 const Mask<T, SIMD_WIDTH> &k,
307 if (is_within_same_page<SIMD_WIDTH>(p)) {
313 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
316 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
317 if (k_arr[i] != T(0)) { result[i] = p[i]; }
322template <
typename T,
size_t SIMD_WIDTH>
323static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_loadu(
const Mask<T, SIMD_WIDTH> &k,
333 if (is_within_same_page<SIMD_WIDTH>(p)) {
339 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
341 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
342 if (k_arr[i] != T(0)) { result[i] = p[i]; }
347template <
typename T,
size_t SIMD_WIDTH>
348static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_loadu(
const Vec<T, SIMD_WIDTH> &src,
349 const Mask<T, SIMD_WIDTH> &k,
357 if (is_within_same_page<SIMD_WIDTH>(p)) {
363 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
366 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
367 if (k_arr[i] != T(0)) { result[i] = p[i]; }
372template <
typename T,
size_t SIMD_WIDTH>
373static SIMD_INLINE
void mask_store(T *
const p,
const Mask<T, SIMD_WIDTH> &k,
374 const Vec<T, SIMD_WIDTH> &a)
386 if (is_within_same_page<SIMD_WIDTH>(p)) {
391 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
394 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
395 if (k_arr[i] != T(0)) { p[i] = a_arr[i]; }
399template <
typename T,
size_t SIMD_WIDTH>
400static SIMD_INLINE
void mask_storeu(T *
const p,
const Mask<T, SIMD_WIDTH> &k,
401 const Vec<T, SIMD_WIDTH> &a)
413 if (is_within_same_page<SIMD_WIDTH>(p)) {
418 store(k_arr, (Vec<T, SIMD_WIDTH>) k);
421 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
422 if (k_arr[i] != T(0)) { p[i] = a_arr[i]; }
428template <
typename T,
size_t SIMD_WIDTH>
429static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_set1(
const Mask<T, SIMD_WIDTH> &k,
434template <
typename T,
size_t SIMD_WIDTH>
435static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_set1(
const Vec<T, SIMD_WIDTH> &src,
436 const Mask<T, SIMD_WIDTH> &k,
470EMULATE_DOP_NAME(
bit_or, or)
480#define EMULATE_SHIFT(OP) \
481 template <size_t COUNT, typename T, size_t SIMD_WIDTH> \
482 static SIMD_INLINE Vec<T, SIMD_WIDTH> maskz_##OP( \
483 const Mask<T, SIMD_WIDTH> &k, const Vec<T, SIMD_WIDTH> &a) \
485 return mask::mask_ifelsezero(k, OP<COUNT>(a)); \
487 template <size_t COUNT, typename T, size_t SIMD_WIDTH> \
488 static SIMD_INLINE Vec<T, SIMD_WIDTH> mask_##OP( \
489 const Vec<T, SIMD_WIDTH> &src, const Mask<T, SIMD_WIDTH> &k, \
490 const Vec<T, SIMD_WIDTH> &a) \
492 return mask::mask_ifelse(k, OP<COUNT>(a), src); \
507#define EMULATE_CMP(OP) \
508 template <typename T, size_t SIMD_WIDTH> \
509 static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_##OP( \
510 const Mask<T, SIMD_WIDTH> &k, const Vec<T, SIMD_WIDTH> &a, \
511 const Vec<T, SIMD_WIDTH> &b) \
513 return Mask<T, SIMD_WIDTH>(mask::mask_ifelsezero(k, OP(a, b))); \
515 template <typename T, size_t SIMD_WIDTH> \
516 static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_##OP( \
517 const Vec<T, SIMD_WIDTH> &a, const Vec<T, SIMD_WIDTH> &b) \
519 return Mask<T, SIMD_WIDTH>(OP(a, b)); \
531template <
typename T,
size_t SIMD_WIDTH>
532static SIMD_INLINE
bool mask_test_all_zeros(
const Mask<T, SIMD_WIDTH> &k,
533 const Vec<T, SIMD_WIDTH> &a)
538template <
typename T,
size_t SIMD_WIDTH>
539static SIMD_INLINE
bool mask_test_all_ones(
const Mask<T, SIMD_WIDTH> &k,
540 const Vec<T, SIMD_WIDTH> &a)
542 return mask::mask_test_all_zeros(
547template <
typename T,
size_t SIMD_WIDTH>
548static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_all_ones(OutputType<T>,
554#define EMULATE_DMASKOP(NAME) \
555 template <typename T, size_t SIMD_WIDTH> \
556 static SIMD_INLINE Mask<T, SIMD_WIDTH> k##NAME(const Mask<T, SIMD_WIDTH> &a, \
557 const Mask<T, SIMD_WIDTH> &b) \
559 return (Mask<T, SIMD_WIDTH>) NAME##_((Vec<T, SIMD_WIDTH>) a, \
560 (Vec<T, SIMD_WIDTH>) b); \
567template <
typename T,
size_t SIMD_WIDTH>
568static SIMD_INLINE Mask<T, SIMD_WIDTH> kandn(
const Mask<T, SIMD_WIDTH> &a,
569 const Mask<T, SIMD_WIDTH> &b)
571 return (Mask<T, SIMD_WIDTH>)
bit_andnot((Vec<T, SIMD_WIDTH>) a,
572 (Vec<T, SIMD_WIDTH>) b);
581template <
typename T,
size_t SIMD_WIDTH>
582static SIMD_INLINE Mask<T, SIMD_WIDTH> kxnor(
const Mask<T, SIMD_WIDTH> &a,
583 const Mask<T, SIMD_WIDTH> &b)
585 return (Mask<T, SIMD_WIDTH>)
bit_not(
586 bit_xor((Vec<T, SIMD_WIDTH>) a, (Vec<T, SIMD_WIDTH>) b));
589template <
typename T,
size_t SIMD_WIDTH>
590static SIMD_INLINE Mask<T, SIMD_WIDTH> kadd(
const Mask<T, SIMD_WIDTH> &a,
591 const Mask<T, SIMD_WIDTH> &b)
593 Mask<T, SIMD_WIDTH> ret;
594 ret = (((uintmax_t) a) + ((uintmax_t) b));
598template <
typename T,
size_t SIMD_WIDTH>
599static SIMD_INLINE Mask<T, SIMD_WIDTH> knot(
const Mask<T, SIMD_WIDTH> &a)
601 return (Mask<T, SIMD_WIDTH>)
bit_not((Vec<T, SIMD_WIDTH>) a);
607template <
typename T,
size_t SIMD_WIDTH>
608static SIMD_INLINE Mask<T, SIMD_WIDTH> kshiftri(
const Mask<T, SIMD_WIDTH> &a,
615 if (count >=
sizeof(uintmax_t) * 8) {
return Mask<T, SIMD_WIDTH>(0); }
616 return (Mask<T, SIMD_WIDTH>) (((uintmax_t) a) >> count);
618template <
typename T,
size_t SIMD_WIDTH>
619static SIMD_INLINE Mask<T, SIMD_WIDTH> kshiftli(
const Mask<T, SIMD_WIDTH> &a,
626 if (count >=
sizeof(uintmax_t) * 8) {
return Mask<T, SIMD_WIDTH>(0); }
627 return (Mask<T, SIMD_WIDTH>) (((uintmax_t) a) << count);
631template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
632static SIMD_INLINE Mask<T, SIMD_WIDTH> kshiftri(
const Mask<T, SIMD_WIDTH> &a)
634 return (Mask<T, SIMD_WIDTH>)
srle<COUNT>((Vec<T, SIMD_WIDTH>) a);
636template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
637static SIMD_INLINE Mask<T, SIMD_WIDTH> kshiftli(
const Mask<T, SIMD_WIDTH> &a)
639 return (Mask<T, SIMD_WIDTH>)
slle<COUNT>((Vec<T, SIMD_WIDTH>) a);
645template <
bool UP,
typename T,
size_t SIMD_WIDTH>
651 for (
size_t i = 0; i < Vec<T, SIMD_WIDTH>::elems; i++) {
655 i < Vec<T, SIMD_WIDTH>::elems * 2; i++) {
661template <
typename T,
size_t SIMD_WIDTH>
662static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_set_true_low(
const size_t x,
667 return mask_all_ones(OutputType<T>(), Integer<SIMD_WIDTH>());
669 static MaskSetBuffer<false, T, SIMD_WIDTH> buffer;
670 return Mask<T, SIMD_WIDTH>(
674template <
typename T,
size_t SIMD_WIDTH>
675static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_set_true_high(
const size_t x,
680 return mask_all_ones(OutputType<T>(), Integer<SIMD_WIDTH>());
682 static MaskSetBuffer<true, T, SIMD_WIDTH> buffer;
686template <
typename T,
size_t SIMD_WIDTH>
687static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_set_false_low(
const size_t x,
692 static MaskSetBuffer<true, T, SIMD_WIDTH> buffer;
693 return Mask<T, SIMD_WIDTH>(
697template <
typename T,
size_t SIMD_WIDTH>
698static SIMD_INLINE Mask<T, SIMD_WIDTH> mask_set_false_high(
const size_t x,
703 static MaskSetBuffer<false, T, SIMD_WIDTH> buffer;
709template <
typename T,
size_t SIMD_WIDTH>
710static SIMD_INLINE
bool ktest_all_zeros(
const Mask<T, SIMD_WIDTH> &a)
715template <
typename T,
size_t SIMD_WIDTH>
716static SIMD_INLINE
bool ktest_all_ones(
const Mask<T, SIMD_WIDTH> &a)
723template <
typename T,
size_t SIMD_WIDTH>
724static SIMD_INLINE
bool kcmpeq(
const Mask<T, SIMD_WIDTH> &a,
725 const Mask<T, SIMD_WIDTH> &b)
727 return internal::mask::ktest_all_zeros(internal::mask::kxor(a, b));
bool operator[](const size_t i) const
Returns the Mask bit at the given index.
Mask(const Vec< T, SIMD_WIDTH > &x)
Constructs a Mask from a Vec.
bool operator==(const Mask< T, SIMD_WIDTH > &other) const
Compares the Mask with another Mask.
static constexpr size_t elems
Number of elements in the vector. Alias for elements.
Definition vec.H:85
void * aligned_malloc(size_t alignment, size_t size)
Aligned memory allocation.
Definition alloc.H:61
void aligned_free(void *ptr)
Aligned memory deallocation.
Definition alloc.H:102
static Vec< T, SIMD_WIDTH > sub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's.
Definition base.H:388
static Vec< T, SIMD_WIDTH > subs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's using saturated arithmetic.
Definition base.H:405
static Vec< T, SIMD_WIDTH > avg(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the average of the elements of two Vec's, rounded up.
Definition base.H:456
static Vec< T, SIMD_WIDTH > div2rd(const Vec< T, SIMD_WIDTH > &a)
Divides all elements of a Vec by 2 and rounds down the result.
Definition ext.H:1776
static Vec< T, SIMD_WIDTH > adds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's using saturated arithmetic.
Definition base.H:374
static Vec< T, SIMD_WIDTH > div2r0(const Vec< T, SIMD_WIDTH > &a)
Divides all elements of a Vec by 2 and rounds the result to 0.
Definition ext.H:1696
static Vec< T, SIMD_WIDTH > div(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Divides the elements of two Vec's.
Definition base.H:439
static Vec< T, SIMD_WIDTH > add(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's.
Definition base.H:357
static Vec< T, SIMD_WIDTH > mul(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Multiplies the elements of two Vec's.
Definition base.H:421
static Vec< T, SIMD_WIDTH > cmplt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than ( < ).
Definition base.H:924
static bool test_all_ones(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are one.
Definition base.H:1054
static Vec< T, SIMD_WIDTH > cmple(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than-or-equal ( <= ).
Definition base.H:945
static bool test_all_zeros(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are zero.
Definition base.H:1042
static Vec< T, SIMD_WIDTH > cmpneq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for inequality ( != ).
Definition base.H:1029
static Vec< T, SIMD_WIDTH > cmpge(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than-or-equal ( >= ).
Definition base.H:987
static Vec< T, SIMD_WIDTH > cmpgt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than ( > ).
Definition base.H:1008
static Vec< T, SIMD_WIDTH > cmpeq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for equality ( == ).
Definition base.H:966
static Vec< T, SIMD_WIDTH > ifelse(const Vec< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal, const Vec< T, SIMD_WIDTH > &falseVal)
Selects elements from two Vec's based on a condition Vec.
Definition base.H:126
static Vec< T, SIMD_WIDTH > slle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec left by a constant number of elements, shifting in zero elements.
Definition base.H:1353
static Vec< T, SIMD_WIDTH > srle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec right by a constant number of elements, shifting in zero elements.
Definition base.H:1338
static Vec< T, SIMD_WIDTH > hsubs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's with saturation.
Definition base.H:523
static Vec< T, SIMD_WIDTH > hadds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's with saturation.
Definition base.H:493
static Vec< T, SIMD_WIDTH > hadd(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's.
Definition base.H:477
static Vec< T, SIMD_WIDTH > hsub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's.
Definition base.H:507
static Vec< T, SIMD_WIDTH > int2bits(const uint64_t a)
Sets all bits of each element of a Vec to the corresponding bit of an integer.
Definition base.H:190
static uint64_t msb2int(const Vec< T, SIMD_WIDTH > &a)
Collects the most significant bit of each element of a Vec into an integer.
Definition base.H:147
static Vec< T, SIMD_WIDTH > setzero()
Returns a Vec with all elements set to zero.
Definition base.H:70
static Vec< T, SIMD_WIDTH > set1(const dont_deduce< T > a)
Returns a Vec with all elements set to the same value.
Definition base.H:88
static Vec< T, SIMD_WIDTH > bit_andnot(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise ANDNOT of two Vec's.
Definition base.H:762
static Vec< T, SIMD_WIDTH > bit_and(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise AND of two Vec's.
Definition base.H:732
static Vec< T, SIMD_WIDTH > bit_xor(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise XOR of two Vec's.
Definition base.H:776
static Vec< T, SIMD_WIDTH > bit_or(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise OR of two Vec's.
Definition base.H:746
static Vec< T, SIMD_WIDTH > bit_not(const Vec< T, SIMD_WIDTH > &a)
Computes the bitwise NOT of a Vec.
Definition base.H:789
static Vec< T, SIMD_WIDTH > sqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the square root of the elements of a Vec.
Definition base.H:584
static Vec< T, SIMD_WIDTH > rcp(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal of the elements of a Vec.
Definition base.H:547
static Vec< T, SIMD_WIDTH > rsqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal square root of the elements of a Vec.
Definition base.H:565
static Vec< T, SIMD_WIDTH > truncate(const Vec< T, SIMD_WIDTH > &a)
Truncates the elements of a Vec to the nearest integer i.e. rounds towards zero.
Definition base.H:712
static Vec< T, SIMD_WIDTH > min(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the minimum of the elements of two Vec's.
Definition base.H:606
static Vec< T, SIMD_WIDTH > floor(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec down to the nearest integer.
Definition base.H:683
static Vec< T, SIMD_WIDTH > max(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the maximum of the elements of two Vec's.
Definition base.H:620
static Vec< T, SIMD_WIDTH > ceil(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec up to the nearest integer.
Definition base.H:668
static Vec< T, SIMD_WIDTH > neg(const Vec< T, SIMD_WIDTH > &a)
Negates the elements of a Vec.
Definition base.H:635
static Vec< T, SIMD_WIDTH > round(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec to the nearest integer.
Definition base.H:697
static Vec< T, SIMD_WIDTH > abs(const Vec< T, SIMD_WIDTH > &a)
Computes the absolute value of the elements of a Vec.
Definition base.H:654
static Vec< T, SIMD_WIDTH > load(const T *const p)
Loads a Vec from aligned memory.
Definition base.H:209
static Vec< T, SIMD_WIDTH > loadu(const T *const p)
Loads a Vec from unaligned memory.
Definition base.H:231
static void store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory.
Definition base.H:246
static void storeu(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to unaligned memory.
Definition base.H:265
static Vec< T, SIMD_WIDTH > srli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in zeros.
Definition base.H:828
static Vec< T, SIMD_WIDTH > slli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec left by a constant number of bits while shifting in zeros.
Definition base.H:844
static Vec< T, SIMD_WIDTH > srai(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in the sign bit.
Definition base.H:812
static Vec< Tout, SIMD_WIDTH > cvts(const Vec< Tin, SIMD_WIDTH > &a)
Converts the elements of a Vec between integer and floating point types of the same size.
Definition base.H:1445
static Vec< Tout, SIMD_WIDTH > reinterpret(const Vec< Tin, SIMD_WIDTH > &a)
Reinterprets a given Vec as a Vec with a different element type.
Definition base.H:58
Namespace for T-SIMD.
Definition time_measurement.H:161
static constexpr T trueval()
Returns a value where all bits are 1.
Definition types.H:319