T-SIMD v31.1.0
A C++ template SIMD library
Loading...
Searching...
No Matches
base.H
1// ===========================================================================
2//
3// base-level classes and functions
4//
5// This source code file is part of the following software:
6//
7// - the low-level C++ template SIMD library
8// - the SIMD implementation of the MinWarping and the 2D-Warping methods
9// for local visual homing.
10//
11// The software is provided based on the accompanying license agreement in the
12// file LICENSE.md.
13// The software is provided "as is" without any warranty by the licensor and
14// without any liability of the licensor, and the software may not be
15// distributed by the licensee; see the license agreement for details.
16//
17// (C) Jonas Keller, Ralf Möller
18// Computer Engineering
19// Faculty of Technology
20// Bielefeld University
21// www.ti.uni-bielefeld.de
22//
23// ===========================================================================
24
25// 22. Jan 23 (Jonas Keller): introduced wrapper layer that wraps the internal
26// architecture-specific implementations
27
28// 09. Mar 23 (Jonas Keller): added doxygen documentation
29
30#pragma once
31#ifndef SIMD_VEC_BASE_H_
32#define SIMD_VEC_BASE_H_
33
34#include "defs.H"
35#include "intel/base_impl_intel16.H"
36#include "intel/base_impl_intel32.H"
37#include "intel/base_impl_intel64.H"
38#include "neon/base_impl_neon16.H"
39#include "sandbox/base_impl_sandbox.H"
40#include "types.H"
41#include "vec.H"
42
43#include <cstdint>
44#include <type_traits>
45
46namespace simd {
57template <typename Tout, typename Tin, size_t SIMD_WIDTH>
59 const Vec<Tin, SIMD_WIDTH> &a)
60{
61 return internal::base::reinterpret(a, internal::OutputType<Tout>());
62}
63
69template <typename T, size_t SIMD_WIDTH_DEFAULT_NATIVE>
70static SIMD_INLINE Vec<T, SIMD_WIDTH> setzero()
71{
72 return internal::base::setzero(internal::OutputType<T>(),
73 internal::Integer<SIMD_WIDTH>());
74}
75
87template <typename T, size_t SIMD_WIDTH_DEFAULT_NATIVE>
88static SIMD_INLINE Vec<T, SIMD_WIDTH> set1(const dont_deduce<T> a)
89{
90 return internal::base::set1(a, internal::Integer<SIMD_WIDTH>());
91}
92
93// 30. Jan 23 (Jonas Keller): added iota
94
105template <typename T, size_t SIMD_WIDTH_DEFAULT_NATIVE>
106static SIMD_INLINE Vec<T, SIMD_WIDTH> iota()
107{
108 return internal::base::iota(internal::OutputType<T>(),
109 internal::Integer<SIMD_WIDTH>());
110}
111
125template <typename Tcond, typename T, size_t SIMD_WIDTH>
126static SIMD_INLINE Vec<T, SIMD_WIDTH> ifelse(const Vec<Tcond, SIMD_WIDTH> &cond,
127 const Vec<T, SIMD_WIDTH> &trueVal,
128 const Vec<T, SIMD_WIDTH> &falseVal)
129{
130 static_assert(sizeof(Tcond) == sizeof(T),
131 "condition and value types must have the same size");
132 return internal::base::ifelse(reinterpret<T>(cond), trueVal, falseVal);
133}
134
135// 27. Aug 22 (Jonas Keller): added msb2int
136
146template <typename T, size_t SIMD_WIDTH>
147static SIMD_INLINE uint64_t msb2int(const Vec<T, SIMD_WIDTH> &a)
148{
149 return internal::base::msb2int(a);
150}
151
152// 09. Oct 22 (Jonas Keller): added int2msb
153
168template <typename T, size_t SIMD_WIDTH>
169static SIMD_INLINE Vec<T, SIMD_WIDTH> int2msb(const uint64_t a)
170{
171 return internal::base::int2msb(a, internal::OutputType<T>(),
172 internal::Integer<SIMD_WIDTH>());
173}
174
175// 09. Oct 22 (Jonas Keller): added int2bits
176
189template <typename T, size_t SIMD_WIDTH>
190static SIMD_INLINE Vec<T, SIMD_WIDTH> int2bits(const uint64_t a)
191{
192 return internal::base::int2bits(a, internal::OutputType<T>(),
193 internal::Integer<SIMD_WIDTH>());
194}
195
208template <size_t SIMD_WIDTH_DEFAULT_NATIVE, typename T>
209static SIMD_INLINE Vec<T, SIMD_WIDTH> load(const T *const p)
210{
211 // 08. Apr 23 (Jonas Keller):
212 // added alignment check (if SIMD_ALIGN_CHK is defined)
213 SIMD_CHECK_ALIGNMENT(p, SIMD_WIDTH);
214 return internal::base::load(p, internal::Integer<SIMD_WIDTH>());
215}
216
230template <size_t SIMD_WIDTH_DEFAULT_NATIVE, typename T>
231static SIMD_INLINE Vec<T, SIMD_WIDTH> loadu(const T *const p)
232{
233 return internal::base::loadu(p, internal::Integer<SIMD_WIDTH>());
234}
235
245template <typename T, size_t SIMD_WIDTH>
246static SIMD_INLINE void store(T *const p, const Vec<T, SIMD_WIDTH> &a)
247{
248 // 08. Apr 23 (Jonas Keller):
249 // added alignment check (if SIMD_ALIGN_CHK is defined)
250 SIMD_CHECK_ALIGNMENT(p, SIMD_WIDTH);
251 internal::base::store(p, a);
252}
253
264template <typename T, size_t SIMD_WIDTH>
265static SIMD_INLINE void storeu(T *const p, const Vec<T, SIMD_WIDTH> &a)
266{
267 internal::base::storeu(p, a);
268}
269
285template <typename T, size_t SIMD_WIDTH>
286static SIMD_INLINE void stream_store(T *const p, const Vec<T, SIMD_WIDTH> &a)
287{
288 // 08. Apr 23 (Jonas Keller):
289 // added alignment check (if SIMD_ALIGN_CHK is defined)
290 SIMD_CHECK_ALIGNMENT(p, SIMD_WIDTH);
291 internal::base::stream_store(p, a);
292}
293
309static SIMD_INLINE void lfence()
310{
311 internal::base::lfence();
312}
313
324static SIMD_INLINE void sfence()
325{
326 internal::base::sfence();
327}
328
337static SIMD_INLINE void mfence()
338{
339 internal::base::mfence();
340}
341
356template <typename T, size_t SIMD_WIDTH>
357static SIMD_INLINE Vec<T, SIMD_WIDTH> add(const Vec<T, SIMD_WIDTH> &a,
358 const Vec<T, SIMD_WIDTH> &b)
359{
360 return internal::base::add(a, b);
361}
362
373template <typename T, size_t SIMD_WIDTH>
374static SIMD_INLINE Vec<T, SIMD_WIDTH> adds(const Vec<T, SIMD_WIDTH> &a,
375 const Vec<T, SIMD_WIDTH> &b)
376{
377 return internal::base::adds(a, b);
378}
379
387template <typename T, size_t SIMD_WIDTH>
388static SIMD_INLINE Vec<T, SIMD_WIDTH> sub(const Vec<T, SIMD_WIDTH> &a,
389 const Vec<T, SIMD_WIDTH> &b)
390{
391 return internal::base::sub(a, b);
392}
393
404template <typename T, size_t SIMD_WIDTH>
405static SIMD_INLINE Vec<T, SIMD_WIDTH> subs(const Vec<T, SIMD_WIDTH> &a,
406 const Vec<T, SIMD_WIDTH> &b)
407{
408 return internal::base::subs(a, b);
409}
410
420template <typename T, size_t SIMD_WIDTH>
421static SIMD_INLINE Vec<T, SIMD_WIDTH> mul(const Vec<T, SIMD_WIDTH> &a,
422 const Vec<T, SIMD_WIDTH> &b)
423{
424 static_assert(TypeInfo<T>::isFloatingPoint,
425 "mul() is only available for floating point types");
426 return internal::base::mul(a, b);
427}
428
438template <typename T, size_t SIMD_WIDTH>
439static SIMD_INLINE Vec<T, SIMD_WIDTH> div(const Vec<T, SIMD_WIDTH> &a,
440 const Vec<T, SIMD_WIDTH> &b)
441{
442 static_assert(TypeInfo<T>::isFloatingPoint,
443 "div() is only available for floating point types");
444 return internal::base::div(a, b);
445}
446
455template <typename T, size_t SIMD_WIDTH>
456static SIMD_INLINE Vec<T, SIMD_WIDTH> avg(const Vec<T, SIMD_WIDTH> &a,
457 const Vec<T, SIMD_WIDTH> &b)
458{
459 return internal::base::avg(a, b);
460}
461
476template <typename T, size_t SIMD_WIDTH>
477static SIMD_INLINE Vec<T, SIMD_WIDTH> hadd(const Vec<T, SIMD_WIDTH> &a,
478 const Vec<T, SIMD_WIDTH> &b)
479{
480 return internal::base::hadd(a, b);
481}
482
492template <typename T, size_t SIMD_WIDTH>
493static SIMD_INLINE Vec<T, SIMD_WIDTH> hadds(const Vec<T, SIMD_WIDTH> &a,
494 const Vec<T, SIMD_WIDTH> &b)
495{
496 return internal::base::hadds(a, b);
497}
498
506template <typename T, size_t SIMD_WIDTH>
507static SIMD_INLINE Vec<T, SIMD_WIDTH> hsub(const Vec<T, SIMD_WIDTH> &a,
508 const Vec<T, SIMD_WIDTH> &b)
509{
510 return internal::base::hsub(a, b);
511}
512
522template <typename T, size_t SIMD_WIDTH>
523static SIMD_INLINE Vec<T, SIMD_WIDTH> hsubs(const Vec<T, SIMD_WIDTH> &a,
524 const Vec<T, SIMD_WIDTH> &b)
525{
526 return internal::base::hsubs(a, b);
527}
528
546template <typename T, size_t SIMD_WIDTH>
547static SIMD_INLINE Vec<T, SIMD_WIDTH> rcp(const Vec<T, SIMD_WIDTH> &a)
548{
549 static_assert(TypeInfo<T>::isFloatingPoint,
550 "rcp() is only available for floating point types");
551 return internal::base::rcp(a);
552}
553
564template <typename T, size_t SIMD_WIDTH>
565static SIMD_INLINE Vec<T, SIMD_WIDTH> rsqrt(const Vec<T, SIMD_WIDTH> &a)
566{
567 static_assert(TypeInfo<T>::isFloatingPoint,
568 "rsqrt() is only available for floating point types");
569 return internal::base::rsqrt(a);
570}
571
583template <typename T, size_t SIMD_WIDTH>
584static SIMD_INLINE Vec<T, SIMD_WIDTH> sqrt(const Vec<T, SIMD_WIDTH> &a)
585{
586 static_assert(TypeInfo<T>::isFloatingPoint,
587 "sqrt() is only available for floating point types");
588 return internal::base::sqrt(a);
589}
590
605template <typename T, size_t SIMD_WIDTH>
606static SIMD_INLINE Vec<T, SIMD_WIDTH> min(const Vec<T, SIMD_WIDTH> &a,
607 const Vec<T, SIMD_WIDTH> &b)
608{
609 return internal::base::min(a, b);
610}
611
619template <typename T, size_t SIMD_WIDTH>
620static SIMD_INLINE Vec<T, SIMD_WIDTH> max(const Vec<T, SIMD_WIDTH> &a,
621 const Vec<T, SIMD_WIDTH> &b)
622{
623 return internal::base::max(a, b);
624}
625
634template <typename T, size_t SIMD_WIDTH>
635static SIMD_INLINE Vec<T, SIMD_WIDTH> neg(const Vec<T, SIMD_WIDTH> &a)
636{
637 static_assert(TypeInfo<T>::isSigned,
638 "neg() is only available for signed types");
639 return internal::base::neg(a);
640}
641
642// 25. Mar 23 (Jonas Keller): added integer version of ceil, floor, round,
643// truncate and unsigned version of abs
644
653template <typename T, size_t SIMD_WIDTH>
654static SIMD_INLINE Vec<T, SIMD_WIDTH> abs(const Vec<T, SIMD_WIDTH> &a)
655{
656 return internal::base::abs(a);
657}
658
667template <typename T, size_t SIMD_WIDTH>
668static SIMD_INLINE Vec<T, SIMD_WIDTH> ceil(const Vec<T, SIMD_WIDTH> &a)
669{
670 return internal::base::ceil(a);
671}
672
682template <typename T, size_t SIMD_WIDTH>
683static SIMD_INLINE Vec<T, SIMD_WIDTH> floor(const Vec<T, SIMD_WIDTH> &a)
684{
685 return internal::base::floor(a);
686}
687
696template <typename T, size_t SIMD_WIDTH>
697static SIMD_INLINE Vec<T, SIMD_WIDTH> round(const Vec<T, SIMD_WIDTH> &a)
698{
699 return internal::base::round(a);
700}
701
711template <typename T, size_t SIMD_WIDTH>
713{
714 return internal::base::truncate(a);
715}
716
731template <typename T, size_t SIMD_WIDTH>
733 const Vec<T, SIMD_WIDTH> &b)
734{
735 return internal::base::bit_and(a, b);
736}
737
745template <typename T, size_t SIMD_WIDTH>
746static SIMD_INLINE Vec<T, SIMD_WIDTH> bit_or(const Vec<T, SIMD_WIDTH> &a,
747 const Vec<T, SIMD_WIDTH> &b)
748{
749 return internal::base::bit_or(a, b);
750}
751
761template <typename T, size_t SIMD_WIDTH>
763 const Vec<T, SIMD_WIDTH> &b)
764{
765 return internal::base::bit_andnot(a, b);
766}
767
775template <typename T, size_t SIMD_WIDTH>
777 const Vec<T, SIMD_WIDTH> &b)
778{
779 return internal::base::bit_xor(a, b);
780}
781
788template <typename T, size_t SIMD_WIDTH>
790{
791 return internal::base::bit_not(a);
792}
793
811template <size_t COUNT, typename T, size_t SIMD_WIDTH>
812static SIMD_INLINE Vec<T, SIMD_WIDTH> srai(const Vec<T, SIMD_WIDTH> &a)
813{
814 return internal::base::srai<COUNT>(a);
815}
816
827template <size_t COUNT, typename T, size_t SIMD_WIDTH>
828static SIMD_INLINE Vec<T, SIMD_WIDTH> srli(const Vec<T, SIMD_WIDTH> &a)
829{
830 return internal::base::srli<COUNT>(a);
831}
832
843template <size_t COUNT, typename T, size_t SIMD_WIDTH>
844static SIMD_INLINE Vec<T, SIMD_WIDTH> slli(const Vec<T, SIMD_WIDTH> &a)
845{
846 return internal::base::slli<COUNT>(a);
847}
848
849// 12. Jan 23 (Jonas Keller): added sra, srl and sll functions
850
861template <typename T, size_t SIMD_WIDTH>
862static SIMD_INLINE Vec<T, SIMD_WIDTH> sra(const Vec<T, SIMD_WIDTH> &a,
863 const uint8_t count)
864{
865 return internal::base::sra(a, count);
866}
867
878template <typename T, size_t SIMD_WIDTH>
879static SIMD_INLINE Vec<T, SIMD_WIDTH> srl(const Vec<T, SIMD_WIDTH> &a,
880 const uint8_t count)
881{
882 return internal::base::srl(a, count);
883}
884
895template <typename T, size_t SIMD_WIDTH>
896static SIMD_INLINE Vec<T, SIMD_WIDTH> sll(const Vec<T, SIMD_WIDTH> &a,
897 const uint8_t count)
898{
899 return internal::base::sll(a, count);
900}
901
923template <typename T, size_t SIMD_WIDTH>
924static SIMD_INLINE Vec<T, SIMD_WIDTH> cmplt(const Vec<T, SIMD_WIDTH> &a,
925 const Vec<T, SIMD_WIDTH> &b)
926{
927 return internal::base::cmplt(a, b);
928}
929
944template <typename T, size_t SIMD_WIDTH>
945static SIMD_INLINE Vec<T, SIMD_WIDTH> cmple(const Vec<T, SIMD_WIDTH> &a,
946 const Vec<T, SIMD_WIDTH> &b)
947{
948 return internal::base::cmple(a, b);
949}
950
965template <typename T, size_t SIMD_WIDTH>
966static SIMD_INLINE Vec<T, SIMD_WIDTH> cmpeq(const Vec<T, SIMD_WIDTH> &a,
967 const Vec<T, SIMD_WIDTH> &b)
968{
969 return internal::base::cmpeq(a, b);
970}
971
986template <typename T, size_t SIMD_WIDTH>
987static SIMD_INLINE Vec<T, SIMD_WIDTH> cmpge(const Vec<T, SIMD_WIDTH> &a,
988 const Vec<T, SIMD_WIDTH> &b)
989{
990 return internal::base::cmpge(a, b);
991}
992
1007template <typename T, size_t SIMD_WIDTH>
1008static SIMD_INLINE Vec<T, SIMD_WIDTH> cmpgt(const Vec<T, SIMD_WIDTH> &a,
1009 const Vec<T, SIMD_WIDTH> &b)
1010{
1011 return internal::base::cmpgt(a, b);
1012}
1013
1028template <typename T, size_t SIMD_WIDTH>
1030 const Vec<T, SIMD_WIDTH> &b)
1031{
1032 return internal::base::cmpneq(a, b);
1033}
1034
1041template <typename T, size_t SIMD_WIDTH>
1042static SIMD_INLINE bool test_all_zeros(const Vec<T, SIMD_WIDTH> &a)
1043{
1044 return internal::base::test_all_zeros(a);
1045}
1046
1053template <typename T, size_t SIMD_WIDTH>
1054static SIMD_INLINE bool test_all_ones(const Vec<T, SIMD_WIDTH> &a)
1055{
1056 return internal::base::test_all_ones(a);
1057}
1058
1071template <size_t INDEX, typename T, size_t SIMD_WIDTH>
1072static SIMD_INLINE T extract(const Vec<T, SIMD_WIDTH> &a)
1073{
1074 return internal::base::extract<INDEX>(a);
1075}
1076
1085template <size_t LANE_INDEX, typename T, size_t SIMD_WIDTH>
1086static SIMD_INLINE Vec<T, 16> extractLane(const Vec<T, SIMD_WIDTH> &a)
1087{
1088 static_assert(LANE_INDEX < SIMD_WIDTH / 16,
1089 "LANE_INDEX must be less than SIMD_WIDTH / 16");
1090 return internal::base::extractLane<LANE_INDEX>(a);
1091}
1092
1100template <typename T, size_t SIMD_WIDTH>
1102{
1103 return internal::base::reverse(a);
1104}
1105
1140template <size_t N, typename T, size_t SIMD_WIDTH>
1141static SIMD_INLINE void swizzle(Vec<T, SIMD_WIDTH> v[N])
1142{
1143 static_assert(N >= 1 && N <= 5, "N must be between 1 and 5");
1144 internal::base::swizzle(v, internal::Integer<N>());
1145}
1146
1168template <size_t PART, size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>
1170 const Vec<T, SIMD_WIDTH> &b)
1171{
1172 static_assert(PART == 0 || PART == 1, "PART must be 0 or 1");
1173 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1174 "NUM_ELEMS must be at most half of one Vec");
1175 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1176 "NUM_ELEMS must be a power of two");
1177 return internal::base::unpack(a, b, internal::Part<PART>(),
1178 internal::Bytes<NUM_ELEMS * sizeof(T)>());
1179}
1180
1208template <size_t PART, size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>
1210 const Vec<T, SIMD_WIDTH> &b)
1211{
1212 static_assert(PART == 0 || PART == 1, "PART must be 0 or 1");
1213 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1214 "NUM_ELEMS must be at most half of one Vec");
1215 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1216 "NUM_ELEMS must be a power of two");
1217 return internal::base::unpack16(a, b, internal::Part<PART>(),
1218 internal::Bytes<NUM_ELEMS * sizeof(T)>());
1219}
1220
1246template <size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>
1247static SIMD_INLINE void zip(const Vec<T, SIMD_WIDTH> a,
1250{
1251 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1252 "NUM_ELEMS must be at most half of one Vec");
1253 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1254 "NUM_ELEMS must be a power of two");
1255 return internal::base::zip<NUM_ELEMS>(a, b, l, h);
1256}
1257
1285template <size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>
1286static SIMD_INLINE void zip16(const Vec<T, SIMD_WIDTH> a,
1289{
1290 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1291 "NUM_ELEMS must be at most half of one Vec");
1292 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1293 "NUM_ELEMS must be a power of two");
1294 return internal::base::zip16<NUM_ELEMS>(a, b, l, h);
1295}
1296
1315template <size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>
1316static SIMD_INLINE void unzip(const Vec<T, SIMD_WIDTH> a,
1319{
1320 static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,
1321 "NUM_ELEMS must be at most half of one Vec");
1322 static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,
1323 "NUM_ELEMS must be a power of two");
1324 return internal::base::unzip(a, b, l, h,
1325 internal::Bytes<NUM_ELEMS * sizeof(T)>());
1326}
1327
1337template <size_t COUNT, typename T, size_t SIMD_WIDTH>
1338static SIMD_INLINE Vec<T, SIMD_WIDTH> srle(const Vec<T, SIMD_WIDTH> &a)
1339{
1340 return internal::base::srle<COUNT>(a);
1341}
1342
1352template <size_t COUNT, typename T, size_t SIMD_WIDTH>
1353static SIMD_INLINE Vec<T, SIMD_WIDTH> slle(const Vec<T, SIMD_WIDTH> &a)
1354{
1355 return internal::base::slle<COUNT>(a);
1356}
1357
1369template <size_t COUNT, typename T, size_t SIMD_WIDTH>
1371 const Vec<T, SIMD_WIDTH> &l)
1372{
1373 return internal::base::alignre<COUNT>(h, l);
1374}
1375
1396template <typename Tout, typename Tin, size_t SIMD_WIDTH>
1398 const Vec<Tin, SIMD_WIDTH> &b)
1399{
1400 return internal::base::packs(a, b, internal::OutputType<Tout>());
1401}
1402
1422template <typename Tout, typename Tin, size_t SIMD_WIDTH>
1423static SIMD_INLINE void extend(
1424 const Vec<Tin, SIMD_WIDTH> &vIn,
1425 Vec<Tout, SIMD_WIDTH> vOut[sizeof(Tout) / sizeof(Tin)])
1426{
1427 return internal::base::extend(vIn, vOut);
1428}
1429
1444template <typename Tout, typename Tin, size_t SIMD_WIDTH>
1446{
1447 static_assert(sizeof(Tout) == sizeof(Tin),
1448 "Tout and Tin must be the same size");
1449 static_assert(std::is_floating_point<Tout>::value !=
1450 std::is_floating_point<Tin>::value,
1451 "exactly one of Tout and Tin must be a floating point type");
1452 return internal::base::cvts(a, internal::OutputType<Tout>());
1453}
1454
1456} // namespace simd
1457
1458#endif // SIMD_VEC_BASE_H_
SIMD vector class, holds multiple elements of the same type.
Definition vec.H:75
static Vec< T, SIMD_WIDTH > sub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's.
Definition base.H:388
static Vec< T, SIMD_WIDTH > subs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's using saturated arithmetic.
Definition base.H:405
static Vec< T, SIMD_WIDTH > avg(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the average of the elements of two Vec's, rounded up.
Definition base.H:456
static Vec< T, SIMD_WIDTH > adds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's using saturated arithmetic.
Definition base.H:374
static Vec< T, SIMD_WIDTH > div(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Divides the elements of two Vec's.
Definition base.H:439
static Vec< T, SIMD_WIDTH > add(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's.
Definition base.H:357
static Vec< T, SIMD_WIDTH > mul(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Multiplies the elements of two Vec's.
Definition base.H:421
static Vec< T, SIMD_WIDTH > cmplt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than ( < ).
Definition base.H:924
static bool test_all_ones(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are one.
Definition base.H:1054
static Vec< T, SIMD_WIDTH > cmple(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than-or-equal ( <= ).
Definition base.H:945
static bool test_all_zeros(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are zero.
Definition base.H:1042
static Vec< T, SIMD_WIDTH > cmpneq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for inequality ( != ).
Definition base.H:1029
static Vec< T, SIMD_WIDTH > cmpge(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than-or-equal ( >= ).
Definition base.H:987
static Vec< T, SIMD_WIDTH > cmpgt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than ( > ).
Definition base.H:1008
static Vec< T, SIMD_WIDTH > cmpeq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for equality ( == ).
Definition base.H:966
static Vec< T, SIMD_WIDTH > ifelse(const Vec< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal, const Vec< T, SIMD_WIDTH > &falseVal)
Selects elements from two Vec's based on a condition Vec.
Definition base.H:126
static Vec< T, SIMD_WIDTH > slle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec left by a constant number of elements, shifting in zero elements.
Definition base.H:1353
static Vec< T, SIMD_WIDTH > srle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec right by a constant number of elements, shifting in zero elements.
Definition base.H:1338
static Vec< T, SIMD_WIDTH > alignre(const Vec< T, SIMD_WIDTH > &h, const Vec< T, SIMD_WIDTH > &l)
Concatenates two Vec's, shifts the result right by a constant number of elements, and returns the low...
Definition base.H:1370
static T extract(const Vec< T, SIMD_WIDTH > &a)
Extracts a single value from a Vec.
Definition base.H:1072
static Vec< T, 16 > extractLane(const Vec< T, SIMD_WIDTH > &a)
Extracts a 16-byte lane from a Vec as a Vec < T, 16 >.
Definition base.H:1086
static Vec< T, SIMD_WIDTH > hsubs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's with saturation.
Definition base.H:523
static Vec< T, SIMD_WIDTH > hadds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's with saturation.
Definition base.H:493
static Vec< T, SIMD_WIDTH > hadd(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's.
Definition base.H:477
static Vec< T, SIMD_WIDTH > hsub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's.
Definition base.H:507
static Vec< T, SIMD_WIDTH > iota()
Creates a Vec with sequentially increasing numbers, starting with 0.
Definition base.H:106
static Vec< T, SIMD_WIDTH > int2bits(const uint64_t a)
Sets all bits of each element of a Vec to the corresponding bit of an integer.
Definition base.H:190
static uint64_t msb2int(const Vec< T, SIMD_WIDTH > &a)
Collects the most significant bit of each element of a Vec into an integer.
Definition base.H:147
static Vec< T, SIMD_WIDTH > int2msb(const uint64_t a)
Sets the most significant bit of each element of a Vec to the corresponding bit of an integer.
Definition base.H:169
static Vec< T, SIMD_WIDTH > setzero()
Returns a Vec with all elements set to zero.
Definition base.H:70
static Vec< T, SIMD_WIDTH > set1(const dont_deduce< T > a)
Returns a Vec with all elements set to the same value.
Definition base.H:88
static Vec< T, SIMD_WIDTH > bit_andnot(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise ANDNOT of two Vec's.
Definition base.H:762
static Vec< T, SIMD_WIDTH > bit_and(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise AND of two Vec's.
Definition base.H:732
static Vec< T, SIMD_WIDTH > bit_xor(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise XOR of two Vec's.
Definition base.H:776
static Vec< T, SIMD_WIDTH > bit_or(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise OR of two Vec's.
Definition base.H:746
static Vec< T, SIMD_WIDTH > bit_not(const Vec< T, SIMD_WIDTH > &a)
Computes the bitwise NOT of a Vec.
Definition base.H:789
static Vec< T, SIMD_WIDTH > sqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the square root of the elements of a Vec.
Definition base.H:584
static Vec< T, SIMD_WIDTH > rcp(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal of the elements of a Vec.
Definition base.H:547
static Vec< T, SIMD_WIDTH > rsqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal square root of the elements of a Vec.
Definition base.H:565
static Vec< T, SIMD_WIDTH > truncate(const Vec< T, SIMD_WIDTH > &a)
Truncates the elements of a Vec to the nearest integer i.e. rounds towards zero.
Definition base.H:712
static Vec< T, SIMD_WIDTH > min(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the minimum of the elements of two Vec's.
Definition base.H:606
static Vec< T, SIMD_WIDTH > floor(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec down to the nearest integer.
Definition base.H:683
static Vec< T, SIMD_WIDTH > max(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the maximum of the elements of two Vec's.
Definition base.H:620
static Vec< T, SIMD_WIDTH > ceil(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec up to the nearest integer.
Definition base.H:668
static Vec< T, SIMD_WIDTH > neg(const Vec< T, SIMD_WIDTH > &a)
Negates the elements of a Vec.
Definition base.H:635
static Vec< T, SIMD_WIDTH > round(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec to the nearest integer.
Definition base.H:697
static Vec< T, SIMD_WIDTH > abs(const Vec< T, SIMD_WIDTH > &a)
Computes the absolute value of the elements of a Vec.
Definition base.H:654
static Vec< T, SIMD_WIDTH > load(const T *const p)
Loads a Vec from aligned memory.
Definition base.H:209
static Vec< T, SIMD_WIDTH > loadu(const T *const p)
Loads a Vec from unaligned memory.
Definition base.H:231
static void store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory.
Definition base.H:246
static void storeu(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to unaligned memory.
Definition base.H:265
static void stream_store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory using a non-temporal memory hint.
Definition base.H:286
static void mfence()
Full memory fence.
Definition base.H:337
static void sfence()
Store fence.
Definition base.H:324
static void lfence()
Load fence.
Definition base.H:309
static Vec< T, SIMD_WIDTH > reverse(const Vec< T, SIMD_WIDTH > &a)
Reverses the order of the elements of a Vec.
Definition base.H:1101
static Vec< T, SIMD_WIDTH > sra(const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
Shifts the elements of a Vec right by a variable number of bits while shifting in the sign bit.
Definition base.H:862
static Vec< T, SIMD_WIDTH > srli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in zeros.
Definition base.H:828
static Vec< T, SIMD_WIDTH > sll(const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
Shifts the elements of a Vec left by a variable number of bits while shifting in zeros.
Definition base.H:896
static Vec< T, SIMD_WIDTH > slli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec left by a constant number of bits while shifting in zeros.
Definition base.H:844
static Vec< T, SIMD_WIDTH > srai(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in the sign bit.
Definition base.H:812
static Vec< T, SIMD_WIDTH > srl(const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
Shifts the elements of a Vec right by a variable number of bits while shifting in zeros.
Definition base.H:879
static void swizzle(Vec< T, SIMD_WIDTH > v[N])
Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.
Definition base.H:1141
static Vec< Tout, SIMD_WIDTH > cvts(const Vec< Tin, SIMD_WIDTH > &a)
Converts the elements of a Vec between integer and floating point types of the same size.
Definition base.H:1445
static Vec< Tout, SIMD_WIDTH > packs(const Vec< Tin, SIMD_WIDTH > &a, const Vec< Tin, SIMD_WIDTH > &b)
Packs two Vec's into one by converting the elements into the next smaller type with saturation.
Definition base.H:1397
static void extend(const Vec< Tin, SIMD_WIDTH > &vIn, Vec< Tout, SIMD_WIDTH > vOut[sizeof(Tout)/sizeof(Tin)])
Extends the elements of a Vec to a larger or equally sized type.
Definition base.H:1423
static Vec< Tout, SIMD_WIDTH > reinterpret(const Vec< Tin, SIMD_WIDTH > &a)
Reinterprets a given Vec as a Vec with a different element type.
Definition base.H:58
static void zip16(const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
Interleaves blocks of elements of each 16-byte lane of two Vec's.
Definition base.H:1286
static void zip(const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
Interleaves blocks of elements of two Vec's.
Definition base.H:1247
static Vec< T, SIMD_WIDTH > unpack(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Interleaves blocks of elements from the high or low half of two Vec's.
Definition base.H:1169
static void unzip(const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
Deinterleaves blocks of elements two Vec's.
Definition base.H:1316
static Vec< T, SIMD_WIDTH > unpack16(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Interleaves blocks of elements from the high or low half of each 16-byte lane of two Vec's.
Definition base.H:1209
Namespace for T-SIMD.
Definition time_measurement.H:161
typename internal::dont_deduce< T >::type dont_deduce
Helper type to prevent template argument deduction.
Definition types.H:416
Type information for SIMD types.
Definition types.H:257