31#ifndef SIMD_VEC_BASE_H_
32#define SIMD_VEC_BASE_H_
35#include "intel/base_impl_intel16.H"
36#include "intel/base_impl_intel32.H"
37#include "intel/base_impl_intel64.H"
38#include "neon/base_impl_neon16.H"
39#include "sandbox/base_impl_sandbox.H"
57template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
61 return internal::base::reinterpret(a, internal::OutputType<Tout>());
69template <
typename T,
size_t SIMD_WIDTH_DEFAULT_NATIVE>
72 return internal::base::setzero(internal::OutputType<T>(),
73 internal::Integer<SIMD_WIDTH>());
87template <
typename T,
size_t SIMD_WIDTH_DEFAULT_NATIVE>
90 return internal::base::set1(a, internal::Integer<SIMD_WIDTH>());
105template <
typename T,
size_t SIMD_WIDTH_DEFAULT_NATIVE>
108 return internal::base::iota(internal::OutputType<T>(),
109 internal::Integer<SIMD_WIDTH>());
125template <
typename Tcond,
typename T,
size_t SIMD_WIDTH>
130 static_assert(
sizeof(Tcond) ==
sizeof(T),
131 "condition and value types must have the same size");
132 return internal::base::ifelse(
reinterpret<T>(cond), trueVal, falseVal);
146template <
typename T,
size_t SIMD_WIDTH>
149 return internal::base::msb2int(a);
168template <
typename T,
size_t SIMD_WIDTH>
171 return internal::base::int2msb(a, internal::OutputType<T>(),
172 internal::Integer<SIMD_WIDTH>());
189template <
typename T,
size_t SIMD_WIDTH>
192 return internal::base::int2bits(a, internal::OutputType<T>(),
193 internal::Integer<SIMD_WIDTH>());
208template <
size_t SIMD_WIDTH_DEFAULT_NATIVE,
typename T>
213 SIMD_CHECK_ALIGNMENT(p, SIMD_WIDTH);
214 return internal::base::load(p, internal::Integer<SIMD_WIDTH>());
230template <
size_t SIMD_WIDTH_DEFAULT_NATIVE,
typename T>
233 return internal::base::loadu(p, internal::Integer<SIMD_WIDTH>());
245template <
typename T,
size_t SIMD_WIDTH>
250 SIMD_CHECK_ALIGNMENT(p, SIMD_WIDTH);
251 internal::base::store(p, a);
264template <
typename T,
size_t SIMD_WIDTH>
267 internal::base::storeu(p, a);
285template <
typename T,
size_t SIMD_WIDTH>
290 SIMD_CHECK_ALIGNMENT(p, SIMD_WIDTH);
291 internal::base::stream_store(p, a);
311 internal::base::lfence();
326 internal::base::sfence();
339 internal::base::mfence();
356template <
typename T,
size_t SIMD_WIDTH>
360 return internal::base::add(a, b);
373template <
typename T,
size_t SIMD_WIDTH>
377 return internal::base::adds(a, b);
387template <
typename T,
size_t SIMD_WIDTH>
391 return internal::base::sub(a, b);
404template <
typename T,
size_t SIMD_WIDTH>
408 return internal::base::subs(a, b);
420template <
typename T,
size_t SIMD_WIDTH>
425 "mul() is only available for floating point types");
426 return internal::base::mul(a, b);
438template <
typename T,
size_t SIMD_WIDTH>
443 "div() is only available for floating point types");
444 return internal::base::div(a, b);
455template <
typename T,
size_t SIMD_WIDTH>
459 return internal::base::avg(a, b);
476template <
typename T,
size_t SIMD_WIDTH>
480 return internal::base::hadd(a, b);
492template <
typename T,
size_t SIMD_WIDTH>
496 return internal::base::hadds(a, b);
506template <
typename T,
size_t SIMD_WIDTH>
510 return internal::base::hsub(a, b);
522template <
typename T,
size_t SIMD_WIDTH>
526 return internal::base::hsubs(a, b);
546template <
typename T,
size_t SIMD_WIDTH>
550 "rcp() is only available for floating point types");
551 return internal::base::rcp(a);
564template <
typename T,
size_t SIMD_WIDTH>
568 "rsqrt() is only available for floating point types");
569 return internal::base::rsqrt(a);
583template <
typename T,
size_t SIMD_WIDTH>
587 "sqrt() is only available for floating point types");
588 return internal::base::sqrt(a);
605template <
typename T,
size_t SIMD_WIDTH>
609 return internal::base::min(a, b);
619template <
typename T,
size_t SIMD_WIDTH>
623 return internal::base::max(a, b);
634template <
typename T,
size_t SIMD_WIDTH>
638 "neg() is only available for signed types");
639 return internal::base::neg(a);
653template <
typename T,
size_t SIMD_WIDTH>
656 return internal::base::abs(a);
667template <
typename T,
size_t SIMD_WIDTH>
670 return internal::base::ceil(a);
682template <
typename T,
size_t SIMD_WIDTH>
685 return internal::base::floor(a);
696template <
typename T,
size_t SIMD_WIDTH>
699 return internal::base::round(a);
711template <
typename T,
size_t SIMD_WIDTH>
714 return internal::base::truncate(a);
731template <
typename T,
size_t SIMD_WIDTH>
735 return internal::base::bit_and(a, b);
745template <
typename T,
size_t SIMD_WIDTH>
749 return internal::base::bit_or(a, b);
761template <
typename T,
size_t SIMD_WIDTH>
765 return internal::base::bit_andnot(a, b);
775template <
typename T,
size_t SIMD_WIDTH>
779 return internal::base::bit_xor(a, b);
788template <
typename T,
size_t SIMD_WIDTH>
791 return internal::base::bit_not(a);
811template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
814 return internal::base::srai<COUNT>(a);
827template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
830 return internal::base::srli<COUNT>(a);
843template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
846 return internal::base::slli<COUNT>(a);
861template <
typename T,
size_t SIMD_WIDTH>
865 return internal::base::sra(a, count);
878template <
typename T,
size_t SIMD_WIDTH>
882 return internal::base::srl(a, count);
895template <
typename T,
size_t SIMD_WIDTH>
899 return internal::base::sll(a, count);
923template <
typename T,
size_t SIMD_WIDTH>
927 return internal::base::cmplt(a, b);
944template <
typename T,
size_t SIMD_WIDTH>
948 return internal::base::cmple(a, b);
965template <
typename T,
size_t SIMD_WIDTH>
969 return internal::base::cmpeq(a, b);
986template <
typename T,
size_t SIMD_WIDTH>
990 return internal::base::cmpge(a, b);
1007template <
typename T,
size_t SIMD_WIDTH>
1011 return internal::base::cmpgt(a, b);
1028template <
typename T,
size_t SIMD_WIDTH>
1032 return internal::base::cmpneq(a, b);
1041template <
typename T,
size_t SIMD_WIDTH>
1044 return internal::base::test_all_zeros(a);
1053template <
typename T,
size_t SIMD_WIDTH>
1056 return internal::base::test_all_ones(a);
1071template <
size_t INDEX,
typename T,
size_t SIMD_WIDTH>
1074 return internal::base::extract<INDEX>(a);
1085template <
size_t LANE_INDEX,
typename T,
size_t SIMD_WIDTH>
1088 static_assert(LANE_INDEX < SIMD_WIDTH / 16,
1089 "LANE_INDEX must be less than SIMD_WIDTH / 16");
1090 return internal::base::extractLane<LANE_INDEX>(a);
1100template <
typename T,
size_t SIMD_WIDTH>
1103 return internal::base::reverse(a);
1140template <
size_t N,
typename T,
size_t SIMD_WIDTH>
1143 static_assert(N >= 1 && N <= 5,
"N must be between 1 and 5");
1144 internal::base::swizzle(v, internal::Integer<N>());
1168template <
size_t PART,
size_t NUM_ELEMS,
typename T,
size_t SIMD_WIDTH>
1172 static_assert(PART == 0 || PART == 1,
"PART must be 0 or 1");
1173 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1174 "NUM_ELEMS must be at most half of one Vec");
1175 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1176 "NUM_ELEMS must be a power of two");
1177 return internal::base::unpack(a, b, internal::Part<PART>(),
1178 internal::Bytes<NUM_ELEMS *
sizeof(T)>());
1208template <
size_t PART,
size_t NUM_ELEMS,
typename T,
size_t SIMD_WIDTH>
1212 static_assert(PART == 0 || PART == 1,
"PART must be 0 or 1");
1213 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1214 "NUM_ELEMS must be at most half of one Vec");
1215 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1216 "NUM_ELEMS must be a power of two");
1217 return internal::base::unpack16(a, b, internal::Part<PART>(),
1218 internal::Bytes<NUM_ELEMS *
sizeof(T)>());
1246template <
size_t NUM_ELEMS,
typename T,
size_t SIMD_WIDTH>
1251 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1252 "NUM_ELEMS must be at most half of one Vec");
1253 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1254 "NUM_ELEMS must be a power of two");
1255 return internal::base::zip<NUM_ELEMS>(a, b, l, h);
1285template <
size_t NUM_ELEMS,
typename T,
size_t SIMD_WIDTH>
1290 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1291 "NUM_ELEMS must be at most half of one Vec");
1292 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1293 "NUM_ELEMS must be a power of two");
1294 return internal::base::zip16<NUM_ELEMS>(a, b, l, h);
1315template <
size_t NUM_ELEMS,
typename T,
size_t SIMD_WIDTH>
1320 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1321 "NUM_ELEMS must be at most half of one Vec");
1322 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1323 "NUM_ELEMS must be a power of two");
1324 return internal::base::unzip(a, b, l, h,
1325 internal::Bytes<NUM_ELEMS *
sizeof(T)>());
1337template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
1340 return internal::base::srle<COUNT>(a);
1352template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
1355 return internal::base::slle<COUNT>(a);
1369template <
size_t COUNT,
typename T,
size_t SIMD_WIDTH>
1373 return internal::base::alignre<COUNT>(h, l);
1396template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
1400 return internal::base::packs(a, b, internal::OutputType<Tout>());
1422template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
1427 return internal::base::extend(vIn, vOut);
1444template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
1447 static_assert(
sizeof(Tout) ==
sizeof(Tin),
1448 "Tout and Tin must be the same size");
1449 static_assert(std::is_floating_point<Tout>::value !=
1450 std::is_floating_point<Tin>::value,
1451 "exactly one of Tout and Tin must be a floating point type");
1452 return internal::base::cvts(a, internal::OutputType<Tout>());
SIMD vector class, holds multiple elements of the same type.
Definition vec.H:75
static Vec< T, SIMD_WIDTH > sub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's.
Definition base.H:388
static Vec< T, SIMD_WIDTH > subs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's using saturated arithmetic.
Definition base.H:405
static Vec< T, SIMD_WIDTH > avg(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the average of the elements of two Vec's, rounded up.
Definition base.H:456
static Vec< T, SIMD_WIDTH > adds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's using saturated arithmetic.
Definition base.H:374
static Vec< T, SIMD_WIDTH > div(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Divides the elements of two Vec's.
Definition base.H:439
static Vec< T, SIMD_WIDTH > add(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's.
Definition base.H:357
static Vec< T, SIMD_WIDTH > mul(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Multiplies the elements of two Vec's.
Definition base.H:421
static Vec< T, SIMD_WIDTH > cmplt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than ( < ).
Definition base.H:924
static bool test_all_ones(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are one.
Definition base.H:1054
static Vec< T, SIMD_WIDTH > cmple(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than-or-equal ( <= ).
Definition base.H:945
static bool test_all_zeros(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are zero.
Definition base.H:1042
static Vec< T, SIMD_WIDTH > cmpneq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for inequality ( != ).
Definition base.H:1029
static Vec< T, SIMD_WIDTH > cmpge(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than-or-equal ( >= ).
Definition base.H:987
static Vec< T, SIMD_WIDTH > cmpgt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than ( > ).
Definition base.H:1008
static Vec< T, SIMD_WIDTH > cmpeq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for equality ( == ).
Definition base.H:966
static Vec< T, SIMD_WIDTH > ifelse(const Vec< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal, const Vec< T, SIMD_WIDTH > &falseVal)
Selects elements from two Vec's based on a condition Vec.
Definition base.H:126
static Vec< T, SIMD_WIDTH > slle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec left by a constant number of elements, shifting in zero elements.
Definition base.H:1353
static Vec< T, SIMD_WIDTH > srle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec right by a constant number of elements, shifting in zero elements.
Definition base.H:1338
static Vec< T, SIMD_WIDTH > alignre(const Vec< T, SIMD_WIDTH > &h, const Vec< T, SIMD_WIDTH > &l)
Concatenates two Vec's, shifts the result right by a constant number of elements, and returns the low...
Definition base.H:1370
static Vec< T, SIMD_WIDTH > hsubs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's with saturation.
Definition base.H:523
static Vec< T, SIMD_WIDTH > hadds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's with saturation.
Definition base.H:493
static Vec< T, SIMD_WIDTH > hadd(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's.
Definition base.H:477
static Vec< T, SIMD_WIDTH > hsub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's.
Definition base.H:507
static Vec< T, SIMD_WIDTH > iota()
Creates a Vec with sequentially increasing numbers, starting with 0.
Definition base.H:106
static Vec< T, SIMD_WIDTH > int2bits(const uint64_t a)
Sets all bits of each element of a Vec to the corresponding bit of an integer.
Definition base.H:190
static uint64_t msb2int(const Vec< T, SIMD_WIDTH > &a)
Collects the most significant bit of each element of a Vec into an integer.
Definition base.H:147
static Vec< T, SIMD_WIDTH > int2msb(const uint64_t a)
Sets the most significant bit of each element of a Vec to the corresponding bit of an integer.
Definition base.H:169
static Vec< T, SIMD_WIDTH > setzero()
Returns a Vec with all elements set to zero.
Definition base.H:70
static Vec< T, SIMD_WIDTH > set1(const dont_deduce< T > a)
Returns a Vec with all elements set to the same value.
Definition base.H:88
static Vec< T, SIMD_WIDTH > bit_andnot(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise ANDNOT of two Vec's.
Definition base.H:762
static Vec< T, SIMD_WIDTH > bit_and(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise AND of two Vec's.
Definition base.H:732
static Vec< T, SIMD_WIDTH > bit_xor(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise XOR of two Vec's.
Definition base.H:776
static Vec< T, SIMD_WIDTH > bit_or(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise OR of two Vec's.
Definition base.H:746
static Vec< T, SIMD_WIDTH > bit_not(const Vec< T, SIMD_WIDTH > &a)
Computes the bitwise NOT of a Vec.
Definition base.H:789
static Vec< T, SIMD_WIDTH > sqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the square root of the elements of a Vec.
Definition base.H:584
static Vec< T, SIMD_WIDTH > rcp(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal of the elements of a Vec.
Definition base.H:547
static Vec< T, SIMD_WIDTH > rsqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal square root of the elements of a Vec.
Definition base.H:565
static Vec< T, SIMD_WIDTH > truncate(const Vec< T, SIMD_WIDTH > &a)
Truncates the elements of a Vec to the nearest integer i.e. rounds towards zero.
Definition base.H:712
static Vec< T, SIMD_WIDTH > min(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the minimum of the elements of two Vec's.
Definition base.H:606
static Vec< T, SIMD_WIDTH > floor(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec down to the nearest integer.
Definition base.H:683
static Vec< T, SIMD_WIDTH > max(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the maximum of the elements of two Vec's.
Definition base.H:620
static Vec< T, SIMD_WIDTH > ceil(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec up to the nearest integer.
Definition base.H:668
static Vec< T, SIMD_WIDTH > neg(const Vec< T, SIMD_WIDTH > &a)
Negates the elements of a Vec.
Definition base.H:635
static Vec< T, SIMD_WIDTH > round(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec to the nearest integer.
Definition base.H:697
static Vec< T, SIMD_WIDTH > abs(const Vec< T, SIMD_WIDTH > &a)
Computes the absolute value of the elements of a Vec.
Definition base.H:654
static Vec< T, SIMD_WIDTH > load(const T *const p)
Loads a Vec from aligned memory.
Definition base.H:209
static Vec< T, SIMD_WIDTH > loadu(const T *const p)
Loads a Vec from unaligned memory.
Definition base.H:231
static void store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory.
Definition base.H:246
static void storeu(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to unaligned memory.
Definition base.H:265
static void stream_store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory using a non-temporal memory hint.
Definition base.H:286
static void mfence()
Full memory fence.
Definition base.H:337
static void sfence()
Store fence.
Definition base.H:324
static void lfence()
Load fence.
Definition base.H:309
static Vec< T, SIMD_WIDTH > reverse(const Vec< T, SIMD_WIDTH > &a)
Reverses the order of the elements of a Vec.
Definition base.H:1101
static Vec< T, SIMD_WIDTH > sra(const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
Shifts the elements of a Vec right by a variable number of bits while shifting in the sign bit.
Definition base.H:862
static Vec< T, SIMD_WIDTH > srli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in zeros.
Definition base.H:828
static Vec< T, SIMD_WIDTH > sll(const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
Shifts the elements of a Vec left by a variable number of bits while shifting in zeros.
Definition base.H:896
static Vec< T, SIMD_WIDTH > slli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec left by a constant number of bits while shifting in zeros.
Definition base.H:844
static Vec< T, SIMD_WIDTH > srai(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in the sign bit.
Definition base.H:812
static Vec< T, SIMD_WIDTH > srl(const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
Shifts the elements of a Vec right by a variable number of bits while shifting in zeros.
Definition base.H:879
static void swizzle(Vec< T, SIMD_WIDTH > v[N])
Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.
Definition base.H:1141
static Vec< Tout, SIMD_WIDTH > cvts(const Vec< Tin, SIMD_WIDTH > &a)
Converts the elements of a Vec between integer and floating point types of the same size.
Definition base.H:1445
static Vec< Tout, SIMD_WIDTH > packs(const Vec< Tin, SIMD_WIDTH > &a, const Vec< Tin, SIMD_WIDTH > &b)
Packs two Vec's into one by converting the elements into the next smaller type with saturation.
Definition base.H:1397
static void extend(const Vec< Tin, SIMD_WIDTH > &vIn, Vec< Tout, SIMD_WIDTH > vOut[sizeof(Tout)/sizeof(Tin)])
Extends the elements of a Vec to a larger or equally sized type.
Definition base.H:1423
static Vec< Tout, SIMD_WIDTH > reinterpret(const Vec< Tin, SIMD_WIDTH > &a)
Reinterprets a given Vec as a Vec with a different element type.
Definition base.H:58
static void zip16(const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
Interleaves blocks of elements of each 16-byte lane of two Vec's.
Definition base.H:1286
static void zip(const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
Interleaves blocks of elements of two Vec's.
Definition base.H:1247
static Vec< T, SIMD_WIDTH > unpack(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Interleaves blocks of elements from the high or low half of two Vec's.
Definition base.H:1169
static void unzip(const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
Deinterleaves blocks of elements two Vec's.
Definition base.H:1316
static Vec< T, SIMD_WIDTH > unpack16(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Interleaves blocks of elements from the high or low half of each 16-byte lane of two Vec's.
Definition base.H:1209
Namespace for T-SIMD.
Definition time_measurement.H:161
typename internal::dont_deduce< T >::type dont_deduce
Helper type to prevent template argument deduction.
Definition types.H:416
Type information for SIMD types.
Definition types.H:257